179bdfe76SSatish Balay 2e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 379bdfe76SSatish Balay 4dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 5dfbe8321SBarry Smith EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 6dfbe8321SBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int); 7dfbe8321SBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 8dfbe8321SBarry Smith EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []); 9dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode); 10dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 11dfbe8321SBarry Smith EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 12dfbe8321SBarry Smith EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 13dfbe8321SBarry Smith EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 14dfbe8321SBarry Smith EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 1593fea6afSBarry Smith 1693fea6afSBarry Smith /* UGLY, ugly, ugly 1787828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 1893fea6afSBarry Smith not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 1993fea6afSBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 2093fea6afSBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 2193fea6afSBarry Smith into the single precision data structures. 2293fea6afSBarry Smith */ 2393fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 24dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 25dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 26dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 27dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 28dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 2993fea6afSBarry Smith #else 306fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 3193fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 3293fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 336fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 346fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 3593fea6afSBarry Smith #endif 363b2fbd54SBarry Smith 374a2ae208SSatish Balay #undef __FUNCT__ 384a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39dfbe8321SBarry Smith PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 407843d17aSBarry Smith { 417843d17aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42dfbe8321SBarry Smith PetscErrorCode ierr; 43dfbe8321SBarry Smith int i; 4487828ca2SBarry Smith PetscScalar *va,*vb; 457843d17aSBarry Smith Vec vtmp; 467843d17aSBarry Smith 477843d17aSBarry Smith PetscFunctionBegin; 487843d17aSBarry Smith 497843d17aSBarry Smith ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 501ebc52fbSHong Zhang ierr = VecGetArray(v,&va);CHKERRQ(ierr); 517843d17aSBarry Smith 52ac355199SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 537843d17aSBarry Smith ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 541ebc52fbSHong Zhang ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 557843d17aSBarry Smith 56273d9f13SBarry Smith for (i=0; i<A->m; i++){ 57273d9f13SBarry Smith if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 587843d17aSBarry Smith } 597843d17aSBarry Smith 601ebc52fbSHong Zhang ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 611ebc52fbSHong Zhang ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62ac355199SBarry Smith ierr = VecDestroy(vtmp);CHKERRQ(ierr); 637843d17aSBarry Smith 647843d17aSBarry Smith PetscFunctionReturn(0); 657843d17aSBarry Smith } 667843d17aSBarry Smith 677fc3c18eSBarry Smith EXTERN_C_BEGIN 684a2ae208SSatish Balay #undef __FUNCT__ 694a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70dfbe8321SBarry Smith PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 717fc3c18eSBarry Smith { 727fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73dfbe8321SBarry Smith PetscErrorCode ierr; 747fc3c18eSBarry Smith 757fc3c18eSBarry Smith PetscFunctionBegin; 767fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 777fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 787fc3c18eSBarry Smith PetscFunctionReturn(0); 797fc3c18eSBarry Smith } 807fc3c18eSBarry Smith EXTERN_C_END 817fc3c18eSBarry Smith 827fc3c18eSBarry Smith EXTERN_C_BEGIN 834a2ae208SSatish Balay #undef __FUNCT__ 844a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85dfbe8321SBarry Smith PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 867fc3c18eSBarry Smith { 877fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88dfbe8321SBarry Smith PetscErrorCode ierr; 897fc3c18eSBarry Smith 907fc3c18eSBarry Smith PetscFunctionBegin; 917fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 927fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 937fc3c18eSBarry Smith PetscFunctionReturn(0); 947fc3c18eSBarry Smith } 957fc3c18eSBarry Smith EXTERN_C_END 967fc3c18eSBarry Smith 97537820f0SBarry Smith /* 98537820f0SBarry Smith Local utility routine that creates a mapping from the global column 9957b952d6SSatish Balay number to the local number in the off-diagonal part of the local 10057b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 10157b952d6SSatish Balay length of colmap equals the global matrix length. 10257b952d6SSatish Balay */ 1034a2ae208SSatish Balay #undef __FUNCT__ 1044a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105dfbe8321SBarry Smith PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 10657b952d6SSatish Balay { 10757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 10857b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 1096849ba73SBarry Smith PetscErrorCode ierr; 1106849ba73SBarry Smith int nbs = B->nbs,i,bs=B->bs; 11157b952d6SSatish Balay 112d64ed03dSBarry Smith PetscFunctionBegin; 113aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 114f14a1c24SBarry Smith ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 11548e59246SSatish Balay for (i=0; i<nbs; i++){ 1160f5bd95cSBarry Smith ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 11748e59246SSatish Balay } 11848e59246SSatish Balay #else 11982502324SSatish Balay ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 120b0a32e0cSBarry Smith PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 121549d3d68SSatish Balay ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 122928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 12348e59246SSatish Balay #endif 1243a40ed3dSBarry Smith PetscFunctionReturn(0); 12557b952d6SSatish Balay } 12657b952d6SSatish Balay 12780c1aa95SSatish Balay #define CHUNKSIZE 10 12880c1aa95SSatish Balay 129f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 13080c1aa95SSatish Balay { \ 13180c1aa95SSatish Balay \ 13280c1aa95SSatish Balay brow = row/bs; \ 13380c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 13580c1aa95SSatish Balay bcol = col/bs; \ 13680c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 137ab26458aSBarry Smith low = 0; high = nrow; \ 138ab26458aSBarry Smith while (high-low > 3) { \ 139ab26458aSBarry Smith t = (low+high)/2; \ 140ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 141ab26458aSBarry Smith else low = t; \ 142ab26458aSBarry Smith } \ 143ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 14480c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 14580c1aa95SSatish Balay if (rp[_i] == bcol) { \ 14680c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 147eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 148eada6651SSatish Balay else *bap = value; \ 149ac7a638eSSatish Balay goto a_noinsert; \ 15080c1aa95SSatish Balay } \ 15180c1aa95SSatish Balay } \ 15289280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 153a45adfd6SMatthew Knepley else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 15480c1aa95SSatish Balay if (nrow >= rmax) { \ 15580c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 15680c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 1573eda8832SBarry Smith MatScalar *new_a; \ 15880c1aa95SSatish Balay \ 159a45adfd6SMatthew Knepley if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 16089280ab3SLois Curfman McInnes \ 16180c1aa95SSatish Balay /* malloc new storage space */ \ 1623eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 16382502324SSatish Balay ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 16480c1aa95SSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 16580c1aa95SSatish Balay new_i = new_j + new_nz; \ 16680c1aa95SSatish Balay \ 16780c1aa95SSatish Balay /* copy over old data into new slots */ \ 16880c1aa95SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 16980c1aa95SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 17180c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 1723eda8832SBarry Smith ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 1733eda8832SBarry Smith ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 17487828ca2SBarry Smith ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 1763eda8832SBarry Smith aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 17780c1aa95SSatish Balay /* free up old matrix storage */ \ 178606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179606d414cSSatish Balay if (!a->singlemalloc) { \ 180606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182606d414cSSatish Balay } \ 18380c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1847c922b88SBarry Smith a->singlemalloc = PETSC_TRUE; \ 18580c1aa95SSatish Balay \ 18680c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188b0a32e0cSBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 18980c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 19080c1aa95SSatish Balay a->reallocs++; \ 19180c1aa95SSatish Balay a->nz++; \ 19280c1aa95SSatish Balay } \ 19380c1aa95SSatish Balay N = nrow++ - 1; \ 19480c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 19580c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 19680c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1973eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 19880c1aa95SSatish Balay } \ 1993eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 20080c1aa95SSatish Balay rp[_i] = bcol; \ 20180c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 202ac7a638eSSatish Balay a_noinsert:; \ 20380c1aa95SSatish Balay ailen[brow] = nrow; \ 20480c1aa95SSatish Balay } 20557b952d6SSatish Balay 206ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207ac7a638eSSatish Balay { \ 208ac7a638eSSatish Balay brow = row/bs; \ 209ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 211ac7a638eSSatish Balay bcol = col/bs; \ 212ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 213ac7a638eSSatish Balay low = 0; high = nrow; \ 214ac7a638eSSatish Balay while (high-low > 3) { \ 215ac7a638eSSatish Balay t = (low+high)/2; \ 216ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 217ac7a638eSSatish Balay else low = t; \ 218ac7a638eSSatish Balay } \ 219ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 220ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 221ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 222ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 223ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 224ac7a638eSSatish Balay else *bap = value; \ 225ac7a638eSSatish Balay goto b_noinsert; \ 226ac7a638eSSatish Balay } \ 227ac7a638eSSatish Balay } \ 22889280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 229a45adfd6SMatthew Knepley else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 230ac7a638eSSatish Balay if (nrow >= rmax) { \ 231ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 23274c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 2333eda8832SBarry Smith MatScalar *new_a; \ 234ac7a638eSSatish Balay \ 235a45adfd6SMatthew Knepley if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 23689280ab3SLois Curfman McInnes \ 237ac7a638eSSatish Balay /* malloc new storage space */ \ 2383eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 23982502324SSatish Balay ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240ac7a638eSSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 241ac7a638eSSatish Balay new_i = new_j + new_nz; \ 242ac7a638eSSatish Balay \ 243ac7a638eSSatish Balay /* copy over old data into new slots */ \ 244ac7a638eSSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 24574c639caSSatish Balay for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246549d3d68SSatish Balay ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 247ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 2483eda8832SBarry Smith ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 2493eda8832SBarry Smith ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 2503eda8832SBarry Smith ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 2523eda8832SBarry Smith ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253ac7a638eSSatish Balay /* free up old matrix storage */ \ 254606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255606d414cSSatish Balay if (!b->singlemalloc) { \ 256606d414cSSatish Balay ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257606d414cSSatish Balay ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258606d414cSSatish Balay } \ 25974c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 2607c922b88SBarry Smith b->singlemalloc = PETSC_TRUE; \ 261ac7a638eSSatish Balay \ 262ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264b0a32e0cSBarry Smith PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 26574c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 26674c639caSSatish Balay b->reallocs++; \ 26774c639caSSatish Balay b->nz++; \ 268ac7a638eSSatish Balay } \ 269ac7a638eSSatish Balay N = nrow++ - 1; \ 270ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 271ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 272ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 2733eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274ac7a638eSSatish Balay } \ 2753eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276ac7a638eSSatish Balay rp[_i] = bcol; \ 277ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 278ac7a638eSSatish Balay b_noinsert:; \ 279ac7a638eSSatish Balay bilen[brow] = nrow; \ 280ac7a638eSSatish Balay } 281ac7a638eSSatish Balay 28293fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2834a2ae208SSatish Balay #undef __FUNCT__ 2844a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ" 285dfbe8321SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 28657b952d6SSatish Balay { 2876fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288dfbe8321SBarry Smith PetscErrorCode ierr; 289dfbe8321SBarry Smith int i,N = m*n; 2906fa18ffdSBarry Smith MatScalar *vsingle; 29193fea6afSBarry Smith 29293fea6afSBarry Smith PetscFunctionBegin; 2936fa18ffdSBarry Smith if (N > b->setvalueslen) { 2946fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 29582502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2966fa18ffdSBarry Smith b->setvalueslen = N; 2976fa18ffdSBarry Smith } 2986fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2996fa18ffdSBarry Smith 30093fea6afSBarry Smith for (i=0; i<N; i++) { 30193fea6afSBarry Smith vsingle[i] = v[i]; 30293fea6afSBarry Smith } 30393fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 30493fea6afSBarry Smith PetscFunctionReturn(0); 30593fea6afSBarry Smith } 30693fea6afSBarry Smith 3074a2ae208SSatish Balay #undef __FUNCT__ 3084a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 309dfbe8321SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 31093fea6afSBarry Smith { 3116fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 312dfbe8321SBarry Smith PetscErrorCode ierr; 313dfbe8321SBarry Smith int i,N = m*n*b->bs2; 3146fa18ffdSBarry Smith MatScalar *vsingle; 31593fea6afSBarry Smith 31693fea6afSBarry Smith PetscFunctionBegin; 3176fa18ffdSBarry Smith if (N > b->setvalueslen) { 3186fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 31982502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3206fa18ffdSBarry Smith b->setvalueslen = N; 3216fa18ffdSBarry Smith } 3226fa18ffdSBarry Smith vsingle = b->setvaluescopy; 32393fea6afSBarry Smith for (i=0; i<N; i++) { 32493fea6afSBarry Smith vsingle[i] = v[i]; 32593fea6afSBarry Smith } 32693fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 32793fea6afSBarry Smith PetscFunctionReturn(0); 32893fea6afSBarry Smith } 3296fa18ffdSBarry Smith 3304a2ae208SSatish Balay #undef __FUNCT__ 3314a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 332dfbe8321SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 3336fa18ffdSBarry Smith { 3346fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 335dfbe8321SBarry Smith PetscErrorCode ierr; 336dfbe8321SBarry Smith int i,N = m*n; 3376fa18ffdSBarry Smith MatScalar *vsingle; 3386fa18ffdSBarry Smith 3396fa18ffdSBarry Smith PetscFunctionBegin; 3406fa18ffdSBarry Smith if (N > b->setvalueslen) { 3416fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 34282502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3436fa18ffdSBarry Smith b->setvalueslen = N; 3446fa18ffdSBarry Smith } 3456fa18ffdSBarry Smith vsingle = b->setvaluescopy; 3466fa18ffdSBarry Smith for (i=0; i<N; i++) { 3476fa18ffdSBarry Smith vsingle[i] = v[i]; 3486fa18ffdSBarry Smith } 3496fa18ffdSBarry Smith ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 3506fa18ffdSBarry Smith PetscFunctionReturn(0); 3516fa18ffdSBarry Smith } 3526fa18ffdSBarry Smith 3534a2ae208SSatish Balay #undef __FUNCT__ 3544a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 355dfbe8321SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 3566fa18ffdSBarry Smith { 3576fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 358dfbe8321SBarry Smith PetscErrorCode ierr; 359dfbe8321SBarry Smith int i,N = m*n*b->bs2; 3606fa18ffdSBarry Smith MatScalar *vsingle; 3616fa18ffdSBarry Smith 3626fa18ffdSBarry Smith PetscFunctionBegin; 3636fa18ffdSBarry Smith if (N > b->setvalueslen) { 3646fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 36582502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3666fa18ffdSBarry Smith b->setvalueslen = N; 3676fa18ffdSBarry Smith } 3686fa18ffdSBarry Smith vsingle = b->setvaluescopy; 3696fa18ffdSBarry Smith for (i=0; i<N; i++) { 3706fa18ffdSBarry Smith vsingle[i] = v[i]; 3716fa18ffdSBarry Smith } 3726fa18ffdSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 3736fa18ffdSBarry Smith PetscFunctionReturn(0); 3746fa18ffdSBarry Smith } 37593fea6afSBarry Smith #endif 37693fea6afSBarry Smith 3774a2ae208SSatish Balay #undef __FUNCT__ 378e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 379dfbe8321SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 38093fea6afSBarry Smith { 38157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 38293fea6afSBarry Smith MatScalar value; 383273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 384dfbe8321SBarry Smith PetscErrorCode ierr; 385dfbe8321SBarry Smith int i,j,row,col; 386273d9f13SBarry Smith int rstart_orig=baij->rstart_bs; 3874fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 3884fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 38957b952d6SSatish Balay 390eada6651SSatish Balay /* Some Variables required in the macro */ 39180c1aa95SSatish Balay Mat A = baij->A; 39280c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 393ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 3943eda8832SBarry Smith MatScalar *aa=a->a; 395ac7a638eSSatish Balay 396ac7a638eSSatish Balay Mat B = baij->B; 397ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 398ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 3993eda8832SBarry Smith MatScalar *ba=b->a; 400ac7a638eSSatish Balay 401ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 402ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 4033eda8832SBarry Smith MatScalar *ap,*bap; 40480c1aa95SSatish Balay 405d64ed03dSBarry Smith PetscFunctionBegin; 40657b952d6SSatish Balay for (i=0; i<m; i++) { 4075ef9f2a5SBarry Smith if (im[i] < 0) continue; 408aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 409590ac198SBarry Smith if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 410639f9d9dSBarry Smith #endif 41157b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 41257b952d6SSatish Balay row = im[i] - rstart_orig; 41357b952d6SSatish Balay for (j=0; j<n; j++) { 41457b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 41557b952d6SSatish Balay col = in[j] - cstart_orig; 41657b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 417f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 41880c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 41973959e64SBarry Smith } else if (in[j] < 0) continue; 420aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 421590ac198SBarry Smith else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);} 422639f9d9dSBarry Smith #endif 42357b952d6SSatish Balay else { 42457b952d6SSatish Balay if (mat->was_assembled) { 425905e6a2fSBarry Smith if (!baij->colmap) { 426905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 427905e6a2fSBarry Smith } 428aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 4290f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 430bba1ac68SSatish Balay col = col - 1; 43148e59246SSatish Balay #else 432bba1ac68SSatish Balay col = baij->colmap[in[j]/bs] - 1; 43348e59246SSatish Balay #endif 43457b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 43557b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4368295de27SSatish Balay col = in[j]; 4379bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 4389bf004c3SSatish Balay B = baij->B; 4399bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 4409bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 4419bf004c3SSatish Balay ba=b->a; 442bba1ac68SSatish Balay } else col += in[j]%bs; 4438295de27SSatish Balay } else col = in[j]; 44457b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 44590da58bdSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 44690da58bdSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 44757b952d6SSatish Balay } 44857b952d6SSatish Balay } 449d64ed03dSBarry Smith } else { 45090f02eecSBarry Smith if (!baij->donotstash) { 451ff2fd236SBarry Smith if (roworiented) { 4526fa18ffdSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 453ff2fd236SBarry Smith } else { 4546fa18ffdSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 45557b952d6SSatish Balay } 45657b952d6SSatish Balay } 45757b952d6SSatish Balay } 45890f02eecSBarry Smith } 4593a40ed3dSBarry Smith PetscFunctionReturn(0); 46057b952d6SSatish Balay } 46157b952d6SSatish Balay 4624a2ae208SSatish Balay #undef __FUNCT__ 4634a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 464dfbe8321SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 465ab26458aSBarry Smith { 466ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467f15d580aSBarry Smith const MatScalar *value; 468f15d580aSBarry Smith MatScalar *barray=baij->barray; 469273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 470dfbe8321SBarry Smith PetscErrorCode ierr; 471dfbe8321SBarry Smith int i,j,ii,jj,row,col,rstart=baij->rstart; 472ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 473ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 474ab26458aSBarry Smith 475b16ae2b1SBarry Smith PetscFunctionBegin; 47630793edcSSatish Balay if(!barray) { 47782502324SSatish Balay ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 47882502324SSatish Balay baij->barray = barray; 47930793edcSSatish Balay } 48030793edcSSatish Balay 481ab26458aSBarry Smith if (roworiented) { 482ab26458aSBarry Smith stepval = (n-1)*bs; 483ab26458aSBarry Smith } else { 484ab26458aSBarry Smith stepval = (m-1)*bs; 485ab26458aSBarry Smith } 486ab26458aSBarry Smith for (i=0; i<m; i++) { 4875ef9f2a5SBarry Smith if (im[i] < 0) continue; 488aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 489590ac198SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 490ab26458aSBarry Smith #endif 491ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 492ab26458aSBarry Smith row = im[i] - rstart; 493ab26458aSBarry Smith for (j=0; j<n; j++) { 49415b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 49515b57d14SSatish Balay if ((roworiented) && (n == 1)) { 496f15d580aSBarry Smith barray = (MatScalar*)v + i*bs2; 49715b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 498f15d580aSBarry Smith barray = (MatScalar*)v + j*bs2; 49915b57d14SSatish Balay } else { /* Here a copy is required */ 500ab26458aSBarry Smith if (roworiented) { 501ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 502ab26458aSBarry Smith } else { 503ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 504abef11f7SSatish Balay } 50547513183SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 50647513183SBarry Smith for (jj=0; jj<bs; jj++) { 50730793edcSSatish Balay *barray++ = *value++; 50847513183SBarry Smith } 50947513183SBarry Smith } 51030793edcSSatish Balay barray -=bs2; 51115b57d14SSatish Balay } 512abef11f7SSatish Balay 513abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 514abef11f7SSatish Balay col = in[j] - cstart; 51593fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 516ab26458aSBarry Smith } 5175ef9f2a5SBarry Smith else if (in[j] < 0) continue; 518aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 519590ac198SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 520ab26458aSBarry Smith #endif 521ab26458aSBarry Smith else { 522ab26458aSBarry Smith if (mat->was_assembled) { 523ab26458aSBarry Smith if (!baij->colmap) { 524ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 525ab26458aSBarry Smith } 526a5eb4965SSatish Balay 527aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 528aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 529fa46199cSSatish Balay { int data; 5300f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 53129bbc08cSBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 532fa46199cSSatish Balay } 53348e59246SSatish Balay #else 53429bbc08cSBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 535a5eb4965SSatish Balay #endif 53648e59246SSatish Balay #endif 537aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 5380f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 539fa46199cSSatish Balay col = (col - 1)/bs; 54048e59246SSatish Balay #else 541a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 54248e59246SSatish Balay #endif 543ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 544ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 545ab26458aSBarry Smith col = in[j]; 546ab26458aSBarry Smith } 547ab26458aSBarry Smith } 548ab26458aSBarry Smith else col = in[j]; 54993fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 550ab26458aSBarry Smith } 551ab26458aSBarry Smith } 552d64ed03dSBarry Smith } else { 553ab26458aSBarry Smith if (!baij->donotstash) { 554ff2fd236SBarry Smith if (roworiented) { 5556fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 556ff2fd236SBarry Smith } else { 5576fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 558ff2fd236SBarry Smith } 559abef11f7SSatish Balay } 560ab26458aSBarry Smith } 561ab26458aSBarry Smith } 5623a40ed3dSBarry Smith PetscFunctionReturn(0); 563ab26458aSBarry Smith } 5646fa18ffdSBarry Smith 5650bdbc534SSatish Balay #define HASH_KEY 0.6180339887 566c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 5676fa18ffdSBarry Smith /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 568c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 5694a2ae208SSatish Balay #undef __FUNCT__ 5704a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 571dfbe8321SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 5720bdbc534SSatish Balay { 5730bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 574273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 575dfbe8321SBarry Smith PetscErrorCode ierr; 576dfbe8321SBarry Smith int i,j,row,col; 577273d9f13SBarry Smith int rstart_orig=baij->rstart_bs; 578c2760754SSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 579c2760754SSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 580329f5518SBarry Smith PetscReal tmp; 5813eda8832SBarry Smith MatScalar **HD = baij->hd,value; 582aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 5834a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5844a15367fSSatish Balay #endif 5850bdbc534SSatish Balay 5860bdbc534SSatish Balay PetscFunctionBegin; 5870bdbc534SSatish Balay 5880bdbc534SSatish Balay for (i=0; i<m; i++) { 589aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 59029bbc08cSBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 591590ac198SBarry Smith if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 5920bdbc534SSatish Balay #endif 5930bdbc534SSatish Balay row = im[i]; 594c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 5950bdbc534SSatish Balay for (j=0; j<n; j++) { 5960bdbc534SSatish Balay col = in[j]; 5976fa18ffdSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5980bdbc534SSatish Balay /* Look up into the Hash Table */ 599c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 600c2760754SSatish Balay h1 = HASH(size,key,tmp); 6010bdbc534SSatish Balay 602c2760754SSatish Balay 603c2760754SSatish Balay idx = h1; 604aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 605187ce0cbSSatish Balay insert_ct++; 606187ce0cbSSatish Balay total_ct++; 607187ce0cbSSatish Balay if (HT[idx] != key) { 608187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 609187ce0cbSSatish Balay if (idx == size) { 610187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 611187ce0cbSSatish Balay if (idx == h1) { 612a45adfd6SMatthew Knepley SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 613187ce0cbSSatish Balay } 614187ce0cbSSatish Balay } 615187ce0cbSSatish Balay } 616187ce0cbSSatish Balay #else 617c2760754SSatish Balay if (HT[idx] != key) { 618c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 619c2760754SSatish Balay if (idx == size) { 620c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 621c2760754SSatish Balay if (idx == h1) { 622a45adfd6SMatthew Knepley SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 623c2760754SSatish Balay } 624c2760754SSatish Balay } 625c2760754SSatish Balay } 626187ce0cbSSatish Balay #endif 627c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 628c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 629c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 6300bdbc534SSatish Balay } 6310bdbc534SSatish Balay } else { 6320bdbc534SSatish Balay if (!baij->donotstash) { 633ff2fd236SBarry Smith if (roworiented) { 6348798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 635ff2fd236SBarry Smith } else { 6368798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 6370bdbc534SSatish Balay } 6380bdbc534SSatish Balay } 6390bdbc534SSatish Balay } 6400bdbc534SSatish Balay } 641aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 642187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 643187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 644187ce0cbSSatish Balay #endif 6450bdbc534SSatish Balay PetscFunctionReturn(0); 6460bdbc534SSatish Balay } 6470bdbc534SSatish Balay 6484a2ae208SSatish Balay #undef __FUNCT__ 6494a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 650dfbe8321SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 6510bdbc534SSatish Balay { 6520bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 653273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 654dfbe8321SBarry Smith PetscErrorCode ierr; 655dfbe8321SBarry Smith int i,j,ii,jj,row,col; 656273d9f13SBarry Smith int rstart=baij->rstart ; 657b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 658c2760754SSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 659329f5518SBarry Smith PetscReal tmp; 6603eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 661f15d580aSBarry Smith const MatScalar *v_t,*value; 662aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 6634a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 6644a15367fSSatish Balay #endif 6650bdbc534SSatish Balay 666d0a41580SSatish Balay PetscFunctionBegin; 667d0a41580SSatish Balay 6680bdbc534SSatish Balay if (roworiented) { 6690bdbc534SSatish Balay stepval = (n-1)*bs; 6700bdbc534SSatish Balay } else { 6710bdbc534SSatish Balay stepval = (m-1)*bs; 6720bdbc534SSatish Balay } 6730bdbc534SSatish Balay for (i=0; i<m; i++) { 674aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 675590ac198SBarry Smith if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]); 676590ac198SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1); 6770bdbc534SSatish Balay #endif 6780bdbc534SSatish Balay row = im[i]; 679187ce0cbSSatish Balay v_t = v + i*bs2; 680c2760754SSatish Balay if (row >= rstart && row < rend) { 6810bdbc534SSatish Balay for (j=0; j<n; j++) { 6820bdbc534SSatish Balay col = in[j]; 6830bdbc534SSatish Balay 6840bdbc534SSatish Balay /* Look up into the Hash Table */ 685c2760754SSatish Balay key = row*Nbs+col+1; 686c2760754SSatish Balay h1 = HASH(size,key,tmp); 6870bdbc534SSatish Balay 688c2760754SSatish Balay idx = h1; 689aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 690187ce0cbSSatish Balay total_ct++; 691187ce0cbSSatish Balay insert_ct++; 692187ce0cbSSatish Balay if (HT[idx] != key) { 693187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 694187ce0cbSSatish Balay if (idx == size) { 695187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 696187ce0cbSSatish Balay if (idx == h1) { 697a45adfd6SMatthew Knepley SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 698187ce0cbSSatish Balay } 699187ce0cbSSatish Balay } 700187ce0cbSSatish Balay } 701187ce0cbSSatish Balay #else 702c2760754SSatish Balay if (HT[idx] != key) { 703c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 704c2760754SSatish Balay if (idx == size) { 705c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 706c2760754SSatish Balay if (idx == h1) { 707a45adfd6SMatthew Knepley SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 708c2760754SSatish Balay } 709c2760754SSatish Balay } 710c2760754SSatish Balay } 711187ce0cbSSatish Balay #endif 712c2760754SSatish Balay baij_a = HD[idx]; 7130bdbc534SSatish Balay if (roworiented) { 714c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 715187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 716187ce0cbSSatish Balay value = v_t; 717187ce0cbSSatish Balay v_t += bs; 718fef45726SSatish Balay if (addv == ADD_VALUES) { 719c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 720c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 721fef45726SSatish Balay baij_a[jj] += *value++; 722b4cc0f5aSSatish Balay } 723b4cc0f5aSSatish Balay } 724fef45726SSatish Balay } else { 725c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 726c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 727fef45726SSatish Balay baij_a[jj] = *value++; 728fef45726SSatish Balay } 729fef45726SSatish Balay } 730fef45726SSatish Balay } 7310bdbc534SSatish Balay } else { 7320bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 733fef45726SSatish Balay if (addv == ADD_VALUES) { 734b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 7350bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 736fef45726SSatish Balay baij_a[jj] += *value++; 737fef45726SSatish Balay } 738fef45726SSatish Balay } 739fef45726SSatish Balay } else { 740fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 741fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 742fef45726SSatish Balay baij_a[jj] = *value++; 743fef45726SSatish Balay } 744b4cc0f5aSSatish Balay } 7450bdbc534SSatish Balay } 7460bdbc534SSatish Balay } 7470bdbc534SSatish Balay } 7480bdbc534SSatish Balay } else { 7490bdbc534SSatish Balay if (!baij->donotstash) { 7500bdbc534SSatish Balay if (roworiented) { 7518798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 7520bdbc534SSatish Balay } else { 7538798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 7540bdbc534SSatish Balay } 7550bdbc534SSatish Balay } 7560bdbc534SSatish Balay } 7570bdbc534SSatish Balay } 758aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 759187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 760187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 761187ce0cbSSatish Balay #endif 7620bdbc534SSatish Balay PetscFunctionReturn(0); 7630bdbc534SSatish Balay } 764133cdb44SSatish Balay 7654a2ae208SSatish Balay #undef __FUNCT__ 7664a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ" 767dfbe8321SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 768d6de1c52SSatish Balay { 769d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 7706849ba73SBarry Smith PetscErrorCode ierr; 7716849ba73SBarry Smith int bs=baij->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 77248e59246SSatish Balay int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 773d6de1c52SSatish Balay 774133cdb44SSatish Balay PetscFunctionBegin; 775d6de1c52SSatish Balay for (i=0; i<m; i++) { 776590ac198SBarry Smith if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 777590ac198SBarry Smith if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 778d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 779d6de1c52SSatish Balay row = idxm[i] - bsrstart; 780d6de1c52SSatish Balay for (j=0; j<n; j++) { 781590ac198SBarry Smith if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 782590ac198SBarry Smith if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 783d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 784d6de1c52SSatish Balay col = idxn[j] - bscstart; 78598dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 786d64ed03dSBarry Smith } else { 787905e6a2fSBarry Smith if (!baij->colmap) { 788905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 789905e6a2fSBarry Smith } 790aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 7910f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 792fa46199cSSatish Balay data --; 79348e59246SSatish Balay #else 79448e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 79548e59246SSatish Balay #endif 79648e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 797d9d09a02SSatish Balay else { 79848e59246SSatish Balay col = data + idxn[j]%bs; 79998dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 800d6de1c52SSatish Balay } 801d6de1c52SSatish Balay } 802d6de1c52SSatish Balay } 803d64ed03dSBarry Smith } else { 80429bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 805d6de1c52SSatish Balay } 806d6de1c52SSatish Balay } 8073a40ed3dSBarry Smith PetscFunctionReturn(0); 808d6de1c52SSatish Balay } 809d6de1c52SSatish Balay 8104a2ae208SSatish Balay #undef __FUNCT__ 8114a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ" 812dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 813d6de1c52SSatish Balay { 814d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 815d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 816dfbe8321SBarry Smith PetscErrorCode ierr; 817dfbe8321SBarry Smith int i,bs2=baij->bs2; 818329f5518SBarry Smith PetscReal sum = 0.0; 8193eda8832SBarry Smith MatScalar *v; 820d6de1c52SSatish Balay 821d64ed03dSBarry Smith PetscFunctionBegin; 822d6de1c52SSatish Balay if (baij->size == 1) { 823064f8208SBarry Smith ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 824d6de1c52SSatish Balay } else { 825d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 826d6de1c52SSatish Balay v = amat->a; 827d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++) { 828aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 829329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 830d6de1c52SSatish Balay #else 831d6de1c52SSatish Balay sum += (*v)*(*v); v++; 832d6de1c52SSatish Balay #endif 833d6de1c52SSatish Balay } 834d6de1c52SSatish Balay v = bmat->a; 835d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++) { 836aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 837329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 838d6de1c52SSatish Balay #else 839d6de1c52SSatish Balay sum += (*v)*(*v); v++; 840d6de1c52SSatish Balay #endif 841d6de1c52SSatish Balay } 842064f8208SBarry Smith ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 843064f8208SBarry Smith *nrm = sqrt(*nrm); 844d64ed03dSBarry Smith } else { 84529bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 846d6de1c52SSatish Balay } 847d64ed03dSBarry Smith } 8483a40ed3dSBarry Smith PetscFunctionReturn(0); 849d6de1c52SSatish Balay } 85057b952d6SSatish Balay 851bd7f49f5SSatish Balay 852fef45726SSatish Balay /* 853fef45726SSatish Balay Creates the hash table, and sets the table 854fef45726SSatish Balay This table is created only once. 855fef45726SSatish Balay If new entried need to be added to the matrix 856fef45726SSatish Balay then the hash table has to be destroyed and 857fef45726SSatish Balay recreated. 858fef45726SSatish Balay */ 8594a2ae208SSatish Balay #undef __FUNCT__ 8604a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 861dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 862596b8d2eSBarry Smith { 863596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 864596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 865596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 8660bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 8676849ba73SBarry Smith PetscErrorCode ierr; 8686849ba73SBarry Smith int size,bs2=baij->bs2,rstart=baij->rstart; 869187ce0cbSSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 870fef45726SSatish Balay int *HT,key; 8713eda8832SBarry Smith MatScalar **HD; 872329f5518SBarry Smith PetscReal tmp; 873aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 8744a15367fSSatish Balay int ct=0,max=0; 8754a15367fSSatish Balay #endif 876fef45726SSatish Balay 877d64ed03dSBarry Smith PetscFunctionBegin; 8780bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 8790bdbc534SSatish Balay size = baij->ht_size; 880fef45726SSatish Balay 8810bdbc534SSatish Balay if (baij->ht) { 8820bdbc534SSatish Balay PetscFunctionReturn(0); 883596b8d2eSBarry Smith } 8840bdbc534SSatish Balay 885fef45726SSatish Balay /* Allocate Memory for Hash Table */ 886b0a32e0cSBarry Smith ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 887b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 888b9e4cc15SSatish Balay HD = baij->hd; 889a07cd24cSSatish Balay HT = baij->ht; 890b9e4cc15SSatish Balay 891b9e4cc15SSatish Balay 89287828ca2SBarry Smith ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 8930bdbc534SSatish Balay 894596b8d2eSBarry Smith 895596b8d2eSBarry Smith /* Loop Over A */ 8960bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 897596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 8980bdbc534SSatish Balay row = i+rstart; 8990bdbc534SSatish Balay col = aj[j]+cstart; 900596b8d2eSBarry Smith 901187ce0cbSSatish Balay key = row*Nbs + col + 1; 902c2760754SSatish Balay h1 = HASH(size,key,tmp); 9030bdbc534SSatish Balay for (k=0; k<size; k++){ 904958c9bccSBarry Smith if (!HT[(h1+k)%size]) { 9050bdbc534SSatish Balay HT[(h1+k)%size] = key; 9060bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 907596b8d2eSBarry Smith break; 908aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 909187ce0cbSSatish Balay } else { 910187ce0cbSSatish Balay ct++; 911187ce0cbSSatish Balay #endif 912596b8d2eSBarry Smith } 913187ce0cbSSatish Balay } 914aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 915187ce0cbSSatish Balay if (k> max) max = k; 916187ce0cbSSatish Balay #endif 917596b8d2eSBarry Smith } 918596b8d2eSBarry Smith } 919596b8d2eSBarry Smith /* Loop Over B */ 9200bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 921596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 9220bdbc534SSatish Balay row = i+rstart; 9230bdbc534SSatish Balay col = garray[bj[j]]; 924187ce0cbSSatish Balay key = row*Nbs + col + 1; 925c2760754SSatish Balay h1 = HASH(size,key,tmp); 9260bdbc534SSatish Balay for (k=0; k<size; k++){ 927958c9bccSBarry Smith if (!HT[(h1+k)%size]) { 9280bdbc534SSatish Balay HT[(h1+k)%size] = key; 9290bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 930596b8d2eSBarry Smith break; 931aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 932187ce0cbSSatish Balay } else { 933187ce0cbSSatish Balay ct++; 934187ce0cbSSatish Balay #endif 935596b8d2eSBarry Smith } 936187ce0cbSSatish Balay } 937aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 938187ce0cbSSatish Balay if (k> max) max = k; 939187ce0cbSSatish Balay #endif 940596b8d2eSBarry Smith } 941596b8d2eSBarry Smith } 942596b8d2eSBarry Smith 943596b8d2eSBarry Smith /* Print Summary */ 944aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 945c38d4ed2SBarry Smith for (i=0,j=0; i<size; i++) { 946596b8d2eSBarry Smith if (HT[i]) {j++;} 947c38d4ed2SBarry Smith } 948958c9bccSBarry Smith PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max); 949187ce0cbSSatish Balay #endif 9503a40ed3dSBarry Smith PetscFunctionReturn(0); 951596b8d2eSBarry Smith } 95257b952d6SSatish Balay 9534a2ae208SSatish Balay #undef __FUNCT__ 9544a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 955dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 956bbb85fb3SSatish Balay { 957bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 958dfbe8321SBarry Smith PetscErrorCode ierr; 959dfbe8321SBarry Smith int nstash,reallocs; 960bbb85fb3SSatish Balay InsertMode addv; 961bbb85fb3SSatish Balay 962bbb85fb3SSatish Balay PetscFunctionBegin; 963bbb85fb3SSatish Balay if (baij->donotstash) { 964bbb85fb3SSatish Balay PetscFunctionReturn(0); 965bbb85fb3SSatish Balay } 966bbb85fb3SSatish Balay 967bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 968bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 969bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 97029bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 971bbb85fb3SSatish Balay } 972bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 973bbb85fb3SSatish Balay 9748798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 9758798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 9768798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 977b0a32e0cSBarry Smith PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 97846680499SSatish Balay ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 979b0a32e0cSBarry Smith PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 980bbb85fb3SSatish Balay PetscFunctionReturn(0); 981bbb85fb3SSatish Balay } 982bbb85fb3SSatish Balay 9834a2ae208SSatish Balay #undef __FUNCT__ 9844a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 985dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 986bbb85fb3SSatish Balay { 987bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 988a2d1c673SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 9896849ba73SBarry Smith PetscErrorCode ierr; 9906849ba73SBarry Smith int i,j,rstart,ncols,n,flg,bs2=baij->bs2; 9917c922b88SBarry Smith int *row,*col,other_disassembled; 9927c922b88SBarry Smith PetscTruth r1,r2,r3; 9933eda8832SBarry Smith MatScalar *val; 994bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 995bbb85fb3SSatish Balay 996bbb85fb3SSatish Balay PetscFunctionBegin; 997bbb85fb3SSatish Balay if (!baij->donotstash) { 998a2d1c673SSatish Balay while (1) { 9998798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1000a2d1c673SSatish Balay if (!flg) break; 1001a2d1c673SSatish Balay 1002bbb85fb3SSatish Balay for (i=0; i<n;) { 1003bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 1004bbb85fb3SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1005bbb85fb3SSatish Balay if (j < n) ncols = j-i; 1006bbb85fb3SSatish Balay else ncols = n-i; 1007bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 100893fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1009bbb85fb3SSatish Balay i = j; 1010bbb85fb3SSatish Balay } 1011bbb85fb3SSatish Balay } 10128798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1013a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 1014a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 1015a2d1c673SSatish Balay restore the original flags */ 1016a2d1c673SSatish Balay r1 = baij->roworiented; 1017a2d1c673SSatish Balay r2 = a->roworiented; 1018a2d1c673SSatish Balay r3 = b->roworiented; 10197c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 10207c922b88SBarry Smith a->roworiented = PETSC_FALSE; 10217c922b88SBarry Smith b->roworiented = PETSC_FALSE; 1022a2d1c673SSatish Balay while (1) { 10238798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1024a2d1c673SSatish Balay if (!flg) break; 1025a2d1c673SSatish Balay 1026a2d1c673SSatish Balay for (i=0; i<n;) { 1027a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 1028a2d1c673SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1029a2d1c673SSatish Balay if (j < n) ncols = j-i; 1030a2d1c673SSatish Balay else ncols = n-i; 103193fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1032a2d1c673SSatish Balay i = j; 1033a2d1c673SSatish Balay } 1034a2d1c673SSatish Balay } 10358798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1036a2d1c673SSatish Balay baij->roworiented = r1; 1037a2d1c673SSatish Balay a->roworiented = r2; 1038a2d1c673SSatish Balay b->roworiented = r3; 1039bbb85fb3SSatish Balay } 1040bbb85fb3SSatish Balay 1041bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1042bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1043bbb85fb3SSatish Balay 1044bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 1045bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 1046bbb85fb3SSatish Balay /* 1047bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 1048bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 1049bbb85fb3SSatish Balay */ 1050bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1051bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1052bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 1053bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1054bbb85fb3SSatish Balay } 1055bbb85fb3SSatish Balay } 1056bbb85fb3SSatish Balay 1057bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1058bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1059bbb85fb3SSatish Balay } 1060bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1061bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1062bbb85fb3SSatish Balay 1063aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1064bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1065f6275e2eSBarry Smith PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1066bbb85fb3SSatish Balay baij->ht_total_ct = 0; 1067bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 1068bbb85fb3SSatish Balay } 1069bbb85fb3SSatish Balay #endif 1070bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1071bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1072bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1073bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1074bbb85fb3SSatish Balay } 1075bbb85fb3SSatish Balay 1076606d414cSSatish Balay if (baij->rowvalues) { 1077606d414cSSatish Balay ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1078606d414cSSatish Balay baij->rowvalues = 0; 1079606d414cSSatish Balay } 1080bbb85fb3SSatish Balay PetscFunctionReturn(0); 1081bbb85fb3SSatish Balay } 108257b952d6SSatish Balay 10834a2ae208SSatish Balay #undef __FUNCT__ 10844a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 10856849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 108657b952d6SSatish Balay { 108757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1088dfbe8321SBarry Smith PetscErrorCode ierr; 1089dfbe8321SBarry Smith int bs = baij->bs,size = baij->size,rank = baij->rank; 109032077d6dSBarry Smith PetscTruth iascii,isdraw; 1091b0a32e0cSBarry Smith PetscViewer sviewer; 1092f3ef73ceSBarry Smith PetscViewerFormat format; 109357b952d6SSatish Balay 1094d64ed03dSBarry Smith PetscFunctionBegin; 1095f81bd7ccSHong Zhang /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 109632077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1097fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 109832077d6dSBarry Smith if (iascii) { 1099b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1100456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 11014e220ebcSLois Curfman McInnes MatInfo info; 1102d132466eSBarry Smith ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1103d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1104b0a32e0cSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1105273d9f13SBarry Smith rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 11066831982aSBarry Smith baij->bs,(int)info.memory);CHKERRQ(ierr); 1107d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1108b0a32e0cSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1109d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1110b0a32e0cSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1111b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 111257b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 11133a40ed3dSBarry Smith PetscFunctionReturn(0); 1114fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 1115b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 11163a40ed3dSBarry Smith PetscFunctionReturn(0); 111704929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 111804929863SHong Zhang PetscFunctionReturn(0); 111957b952d6SSatish Balay } 112057b952d6SSatish Balay } 112157b952d6SSatish Balay 11220f5bd95cSBarry Smith if (isdraw) { 1123b0a32e0cSBarry Smith PetscDraw draw; 112457b952d6SSatish Balay PetscTruth isnull; 1125b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1126b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 112757b952d6SSatish Balay } 112857b952d6SSatish Balay 112957b952d6SSatish Balay if (size == 1) { 1130873048abSBarry Smith ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 113157b952d6SSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1132d64ed03dSBarry Smith } else { 113357b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 113457b952d6SSatish Balay Mat A; 113557b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 1136273d9f13SBarry Smith int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 11373eda8832SBarry Smith MatScalar *a; 113857b952d6SSatish Balay 1139f204ca49SKris Buschelman /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1140f204ca49SKris Buschelman /* Perhaps this should be the type of mat? */ 114157b952d6SSatish Balay if (!rank) { 1142f204ca49SKris Buschelman ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1143d64ed03dSBarry Smith } else { 1144f204ca49SKris Buschelman ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 114557b952d6SSatish Balay } 1146f204ca49SKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1147f204ca49SKris Buschelman ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1148b0a32e0cSBarry Smith PetscLogObjectParent(mat,A); 114957b952d6SSatish Balay 115057b952d6SSatish Balay /* copy over the A part */ 115157b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 115257b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 115382502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 115457b952d6SSatish Balay 115557b952d6SSatish Balay for (i=0; i<mbs; i++) { 115657b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 115757b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 115857b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 115957b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 116057b952d6SSatish Balay for (k=0; k<bs; k++) { 116193fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1162cee3aa6bSSatish Balay col++; a += bs; 116357b952d6SSatish Balay } 116457b952d6SSatish Balay } 116557b952d6SSatish Balay } 116657b952d6SSatish Balay /* copy over the B part */ 116757b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 116857b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 116957b952d6SSatish Balay for (i=0; i<mbs; i++) { 117057b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 117157b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 117257b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 117357b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 117457b952d6SSatish Balay for (k=0; k<bs; k++) { 117593fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1176cee3aa6bSSatish Balay col++; a += bs; 117757b952d6SSatish Balay } 117857b952d6SSatish Balay } 117957b952d6SSatish Balay } 1180606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 11816d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11826d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 118355843e3eSBarry Smith /* 118455843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1185b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 118655843e3eSBarry Smith */ 1187b0a32e0cSBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1188f14a1c24SBarry Smith if (!rank) { 1189e36acaf3SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 11906831982aSBarry Smith ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 119157b952d6SSatish Balay } 1192b0a32e0cSBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 119357b952d6SSatish Balay ierr = MatDestroy(A);CHKERRQ(ierr); 119457b952d6SSatish Balay } 11953a40ed3dSBarry Smith PetscFunctionReturn(0); 119657b952d6SSatish Balay } 119757b952d6SSatish Balay 11984a2ae208SSatish Balay #undef __FUNCT__ 11994a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ" 1200dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 120157b952d6SSatish Balay { 1202dfbe8321SBarry Smith PetscErrorCode ierr; 120332077d6dSBarry Smith PetscTruth iascii,isdraw,issocket,isbinary; 120457b952d6SSatish Balay 1205d64ed03dSBarry Smith PetscFunctionBegin; 120632077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1207fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1208b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1209fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 121032077d6dSBarry Smith if (iascii || isdraw || issocket || isbinary) { 12117b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 12125cd90555SBarry Smith } else { 121329bbc08cSBarry Smith SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 121457b952d6SSatish Balay } 12153a40ed3dSBarry Smith PetscFunctionReturn(0); 121657b952d6SSatish Balay } 121757b952d6SSatish Balay 12184a2ae208SSatish Balay #undef __FUNCT__ 12194a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ" 1220dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 122179bdfe76SSatish Balay { 122279bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1223dfbe8321SBarry Smith PetscErrorCode ierr; 122479bdfe76SSatish Balay 1225d64ed03dSBarry Smith PetscFunctionBegin; 1226aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1227b0a32e0cSBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 122879bdfe76SSatish Balay #endif 12298798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 12308798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1231606d414cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 123279bdfe76SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 123379bdfe76SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1234aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 12350f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 123648e59246SSatish Balay #else 1237606d414cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 123848e59246SSatish Balay #endif 1239606d414cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1240606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1241606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1242606d414cSSatish Balay if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1243606d414cSSatish Balay if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1244606d414cSSatish Balay if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 12456fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 12466fa18ffdSBarry Smith if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 12476fa18ffdSBarry Smith #endif 1248606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 1249*901853e0SKris Buschelman 1250*901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1251*901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1252*901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1253*901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1254*901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1255*901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 12563a40ed3dSBarry Smith PetscFunctionReturn(0); 125779bdfe76SSatish Balay } 125879bdfe76SSatish Balay 12594a2ae208SSatish Balay #undef __FUNCT__ 12604a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ" 1261dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1262cee3aa6bSSatish Balay { 1263cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1264dfbe8321SBarry Smith PetscErrorCode ierr; 1265dfbe8321SBarry Smith int nt; 1266cee3aa6bSSatish Balay 1267d64ed03dSBarry Smith PetscFunctionBegin; 1268e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1269273d9f13SBarry Smith if (nt != A->n) { 127029bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 127147b4a8eaSLois Curfman McInnes } 1272e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1273273d9f13SBarry Smith if (nt != A->m) { 127429bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 127547b4a8eaSLois Curfman McInnes } 127643a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1277f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 127843a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1279f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 128043a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 12813a40ed3dSBarry Smith PetscFunctionReturn(0); 1282cee3aa6bSSatish Balay } 1283cee3aa6bSSatish Balay 12844a2ae208SSatish Balay #undef __FUNCT__ 12854a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1286dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1287cee3aa6bSSatish Balay { 1288cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1289dfbe8321SBarry Smith PetscErrorCode ierr; 1290d64ed03dSBarry Smith 1291d64ed03dSBarry Smith PetscFunctionBegin; 129243a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1293f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 129443a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1295f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 12963a40ed3dSBarry Smith PetscFunctionReturn(0); 1297cee3aa6bSSatish Balay } 1298cee3aa6bSSatish Balay 12994a2ae208SSatish Balay #undef __FUNCT__ 13004a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1301dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1302cee3aa6bSSatish Balay { 1303cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1304dfbe8321SBarry Smith PetscErrorCode ierr; 1305cee3aa6bSSatish Balay 1306d64ed03dSBarry Smith PetscFunctionBegin; 1307cee3aa6bSSatish Balay /* do nondiagonal part */ 13087c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1309cee3aa6bSSatish Balay /* send it on its way */ 1310537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1311cee3aa6bSSatish Balay /* do local part */ 13127c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1313cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1314cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1315cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1316639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 13173a40ed3dSBarry Smith PetscFunctionReturn(0); 1318cee3aa6bSSatish Balay } 1319cee3aa6bSSatish Balay 13204a2ae208SSatish Balay #undef __FUNCT__ 13214a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1322dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1323cee3aa6bSSatish Balay { 1324cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1325dfbe8321SBarry Smith PetscErrorCode ierr; 1326cee3aa6bSSatish Balay 1327d64ed03dSBarry Smith PetscFunctionBegin; 1328cee3aa6bSSatish Balay /* do nondiagonal part */ 13297c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1330cee3aa6bSSatish Balay /* send it on its way */ 1331537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1332cee3aa6bSSatish Balay /* do local part */ 13337c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1334cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1335cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1336cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1337537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 13383a40ed3dSBarry Smith PetscFunctionReturn(0); 1339cee3aa6bSSatish Balay } 1340cee3aa6bSSatish Balay 1341cee3aa6bSSatish Balay /* 1342cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1343cee3aa6bSSatish Balay diagonal block 1344cee3aa6bSSatish Balay */ 13454a2ae208SSatish Balay #undef __FUNCT__ 13464a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1347dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1348cee3aa6bSSatish Balay { 1349cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1350dfbe8321SBarry Smith PetscErrorCode ierr; 1351d64ed03dSBarry Smith 1352d64ed03dSBarry Smith PetscFunctionBegin; 1353273d9f13SBarry Smith if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 13543a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 13553a40ed3dSBarry Smith PetscFunctionReturn(0); 1356cee3aa6bSSatish Balay } 1357cee3aa6bSSatish Balay 13584a2ae208SSatish Balay #undef __FUNCT__ 13594a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ" 1360dfbe8321SBarry Smith PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1361cee3aa6bSSatish Balay { 1362cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1363dfbe8321SBarry Smith PetscErrorCode ierr; 1364d64ed03dSBarry Smith 1365d64ed03dSBarry Smith PetscFunctionBegin; 1366cee3aa6bSSatish Balay ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1367cee3aa6bSSatish Balay ierr = MatScale(aa,a->B);CHKERRQ(ierr); 13683a40ed3dSBarry Smith PetscFunctionReturn(0); 1369cee3aa6bSSatish Balay } 1370026e39d0SSatish Balay 13714a2ae208SSatish Balay #undef __FUNCT__ 13724a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ" 1373dfbe8321SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1374acdf5bf4SSatish Balay { 1375acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 137687828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 13776849ba73SBarry Smith PetscErrorCode ierr; 13786849ba73SBarry Smith int bs = mat->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1379d9d09a02SSatish Balay int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1380d9d09a02SSatish Balay int *cmap,*idx_p,cstart = mat->cstart; 1381acdf5bf4SSatish Balay 1382d64ed03dSBarry Smith PetscFunctionBegin; 138329bbc08cSBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1384acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1385acdf5bf4SSatish Balay 1386acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1387acdf5bf4SSatish Balay /* 1388acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1389acdf5bf4SSatish Balay */ 1390acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1391bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1392bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1393acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1394acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1395acdf5bf4SSatish Balay } 139687828ca2SBarry Smith ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1397acdf5bf4SSatish Balay mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1398acdf5bf4SSatish Balay } 1399acdf5bf4SSatish Balay 140029bbc08cSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1401d9d09a02SSatish Balay lrow = row - brstart; 1402acdf5bf4SSatish Balay 1403acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1404acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1405acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1406f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1407f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1408acdf5bf4SSatish Balay nztot = nzA + nzB; 1409acdf5bf4SSatish Balay 1410acdf5bf4SSatish Balay cmap = mat->garray; 1411acdf5bf4SSatish Balay if (v || idx) { 1412acdf5bf4SSatish Balay if (nztot) { 1413acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1414acdf5bf4SSatish Balay int imark = -1; 1415acdf5bf4SSatish Balay if (v) { 1416acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1417acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1418d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1419acdf5bf4SSatish Balay else break; 1420acdf5bf4SSatish Balay } 1421acdf5bf4SSatish Balay imark = i; 1422acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1423acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1424acdf5bf4SSatish Balay } 1425acdf5bf4SSatish Balay if (idx) { 1426acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1427acdf5bf4SSatish Balay if (imark > -1) { 1428acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1429bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1430acdf5bf4SSatish Balay } 1431acdf5bf4SSatish Balay } else { 1432acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1433d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1434d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1435acdf5bf4SSatish Balay else break; 1436acdf5bf4SSatish Balay } 1437acdf5bf4SSatish Balay imark = i; 1438acdf5bf4SSatish Balay } 1439d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1440d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1441acdf5bf4SSatish Balay } 1442d64ed03dSBarry Smith } else { 1443d212a18eSSatish Balay if (idx) *idx = 0; 1444d212a18eSSatish Balay if (v) *v = 0; 1445d212a18eSSatish Balay } 1446acdf5bf4SSatish Balay } 1447acdf5bf4SSatish Balay *nz = nztot; 1448f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1449f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14503a40ed3dSBarry Smith PetscFunctionReturn(0); 1451acdf5bf4SSatish Balay } 1452acdf5bf4SSatish Balay 14534a2ae208SSatish Balay #undef __FUNCT__ 14544a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1455dfbe8321SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1456acdf5bf4SSatish Balay { 1457acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1458d64ed03dSBarry Smith 1459d64ed03dSBarry Smith PetscFunctionBegin; 1460acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 146129bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1462acdf5bf4SSatish Balay } 1463acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14643a40ed3dSBarry Smith PetscFunctionReturn(0); 1465acdf5bf4SSatish Balay } 1466acdf5bf4SSatish Balay 14674a2ae208SSatish Balay #undef __FUNCT__ 14684a2ae208SSatish Balay #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1469dfbe8321SBarry Smith PetscErrorCode MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 14705a838052SSatish Balay { 14715a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1472d64ed03dSBarry Smith 1473d64ed03dSBarry Smith PetscFunctionBegin; 14745a838052SSatish Balay *bs = baij->bs; 14753a40ed3dSBarry Smith PetscFunctionReturn(0); 14765a838052SSatish Balay } 14775a838052SSatish Balay 14784a2ae208SSatish Balay #undef __FUNCT__ 14794a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1480dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 148158667388SSatish Balay { 148258667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1483dfbe8321SBarry Smith PetscErrorCode ierr; 1484d64ed03dSBarry Smith 1485d64ed03dSBarry Smith PetscFunctionBegin; 148658667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 148758667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14883a40ed3dSBarry Smith PetscFunctionReturn(0); 148958667388SSatish Balay } 14900ac07820SSatish Balay 14914a2ae208SSatish Balay #undef __FUNCT__ 14924a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1493dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14940ac07820SSatish Balay { 14954e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14964e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 1497dfbe8321SBarry Smith PetscErrorCode ierr; 1498329f5518SBarry Smith PetscReal isend[5],irecv[5]; 14990ac07820SSatish Balay 1500d64ed03dSBarry Smith PetscFunctionBegin; 1501f6275e2eSBarry Smith info->block_size = (PetscReal)a->bs; 15024e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 15030e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1504de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 15054e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 15060e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1507de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 15080ac07820SSatish Balay if (flag == MAT_LOCAL) { 15094e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 15104e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 15114e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 15124e220ebcSLois Curfman McInnes info->memory = isend[3]; 15134e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 15140ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1515d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 15164e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15174e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15184e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15194e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15204e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 15210ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1522d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 15234e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15244e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15254e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15264e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15274e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1528d41123aaSBarry Smith } else { 152929bbc08cSBarry Smith SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 15300ac07820SSatish Balay } 1531f6275e2eSBarry Smith info->rows_global = (PetscReal)A->M; 1532f6275e2eSBarry Smith info->columns_global = (PetscReal)A->N; 1533f6275e2eSBarry Smith info->rows_local = (PetscReal)A->m; 1534f6275e2eSBarry Smith info->columns_local = (PetscReal)A->N; 15354e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 15364e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 15374e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 15383a40ed3dSBarry Smith PetscFunctionReturn(0); 15390ac07820SSatish Balay } 15400ac07820SSatish Balay 15414a2ae208SSatish Balay #undef __FUNCT__ 15424a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ" 1543dfbe8321SBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 154458667388SSatish Balay { 154558667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1546dfbe8321SBarry Smith PetscErrorCode ierr; 154758667388SSatish Balay 1548d64ed03dSBarry Smith PetscFunctionBegin; 154912c028f9SKris Buschelman switch (op) { 155012c028f9SKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 155112c028f9SKris Buschelman case MAT_YES_NEW_NONZERO_LOCATIONS: 155212c028f9SKris Buschelman case MAT_COLUMNS_UNSORTED: 155312c028f9SKris Buschelman case MAT_COLUMNS_SORTED: 155412c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 155512c028f9SKris Buschelman case MAT_KEEP_ZEROED_ROWS: 155612c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 155798305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 155898305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 155912c028f9SKris Buschelman break; 156012c028f9SKris Buschelman case MAT_ROW_ORIENTED: 15617c922b88SBarry Smith a->roworiented = PETSC_TRUE; 156298305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 156398305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 156412c028f9SKris Buschelman break; 156512c028f9SKris Buschelman case MAT_ROWS_SORTED: 156612c028f9SKris Buschelman case MAT_ROWS_UNSORTED: 156712c028f9SKris Buschelman case MAT_YES_NEW_DIAGONALS: 1568b0a32e0cSBarry Smith PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 156912c028f9SKris Buschelman break; 157012c028f9SKris Buschelman case MAT_COLUMN_ORIENTED: 15717c922b88SBarry Smith a->roworiented = PETSC_FALSE; 157298305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 157398305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 157412c028f9SKris Buschelman break; 157512c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 15767c922b88SBarry Smith a->donotstash = PETSC_TRUE; 157712c028f9SKris Buschelman break; 157812c028f9SKris Buschelman case MAT_NO_NEW_DIAGONALS: 157929bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 158012c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 15817c922b88SBarry Smith a->ht_flag = PETSC_TRUE; 158212c028f9SKris Buschelman break; 158377e54ba9SKris Buschelman case MAT_SYMMETRIC: 158477e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 15852188ac68SBarry Smith case MAT_HERMITIAN: 15862188ac68SBarry Smith case MAT_SYMMETRY_ETERNAL: 15872188ac68SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 15882188ac68SBarry Smith break; 15899a4540c5SBarry Smith case MAT_NOT_SYMMETRIC: 15909a4540c5SBarry Smith case MAT_NOT_STRUCTURALLY_SYMMETRIC: 15919a4540c5SBarry Smith case MAT_NOT_HERMITIAN: 15929a4540c5SBarry Smith case MAT_NOT_SYMMETRY_ETERNAL: 159377e54ba9SKris Buschelman break; 159412c028f9SKris Buschelman default: 159529bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"unknown option"); 1596d64ed03dSBarry Smith } 15973a40ed3dSBarry Smith PetscFunctionReturn(0); 159858667388SSatish Balay } 159958667388SSatish Balay 16004a2ae208SSatish Balay #undef __FUNCT__ 16014a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1602dfbe8321SBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 16030ac07820SSatish Balay { 16040ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 16050ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 16060ac07820SSatish Balay Mat B; 1607dfbe8321SBarry Smith PetscErrorCode ierr; 1608dfbe8321SBarry Smith int M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 16090ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 16103eda8832SBarry Smith MatScalar *a; 16110ac07820SSatish Balay 1612d64ed03dSBarry Smith PetscFunctionBegin; 161329bbc08cSBarry Smith if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1614f204ca49SKris Buschelman ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1615f204ca49SKris Buschelman ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1616f204ca49SKris Buschelman ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 16170ac07820SSatish Balay 16180ac07820SSatish Balay /* copy over the A part */ 16190ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 16200ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 162182502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 16220ac07820SSatish Balay 16230ac07820SSatish Balay for (i=0; i<mbs; i++) { 16240ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 16250ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16260ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16270ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 16280ac07820SSatish Balay for (k=0; k<bs; k++) { 162993fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16300ac07820SSatish Balay col++; a += bs; 16310ac07820SSatish Balay } 16320ac07820SSatish Balay } 16330ac07820SSatish Balay } 16340ac07820SSatish Balay /* copy over the B part */ 16350ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 16360ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 16370ac07820SSatish Balay for (i=0; i<mbs; i++) { 16380ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 16390ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16400ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16410ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 16420ac07820SSatish Balay for (k=0; k<bs; k++) { 164393fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16440ac07820SSatish Balay col++; a += bs; 16450ac07820SSatish Balay } 16460ac07820SSatish Balay } 16470ac07820SSatish Balay } 1648606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 16490ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16500ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16510ac07820SSatish Balay 16527c922b88SBarry Smith if (matout) { 16530ac07820SSatish Balay *matout = B; 16540ac07820SSatish Balay } else { 1655273d9f13SBarry Smith ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 16560ac07820SSatish Balay } 16573a40ed3dSBarry Smith PetscFunctionReturn(0); 16580ac07820SSatish Balay } 16590e95ebc0SSatish Balay 16604a2ae208SSatish Balay #undef __FUNCT__ 16614a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1662dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 16630e95ebc0SSatish Balay { 166436c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 166536c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 1666dfbe8321SBarry Smith PetscErrorCode ierr; 1667dfbe8321SBarry Smith int s1,s2,s3; 16680e95ebc0SSatish Balay 1669d64ed03dSBarry Smith PetscFunctionBegin; 167036c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 167136c4a09eSSatish Balay if (rr) { 167236c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 167329bbc08cSBarry Smith if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 167436c4a09eSSatish Balay /* Overlap communication with computation. */ 167536c4a09eSSatish Balay ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 167636c4a09eSSatish Balay } 16770e95ebc0SSatish Balay if (ll) { 16780e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 167929bbc08cSBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1680a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 16810e95ebc0SSatish Balay } 168236c4a09eSSatish Balay /* scale the diagonal block */ 168336c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 168436c4a09eSSatish Balay 168536c4a09eSSatish Balay if (rr) { 168636c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 168736c4a09eSSatish Balay ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1688a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 168936c4a09eSSatish Balay } 169036c4a09eSSatish Balay 16913a40ed3dSBarry Smith PetscFunctionReturn(0); 16920e95ebc0SSatish Balay } 16930e95ebc0SSatish Balay 16944a2ae208SSatish Balay #undef __FUNCT__ 16954a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1696dfbe8321SBarry Smith PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 16970ac07820SSatish Balay { 16980ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 16996849ba73SBarry Smith PetscErrorCode ierr; 17006849ba73SBarry Smith int i,N,*rows,*owners = l->rowners,size = l->size; 1701c1dc657dSBarry Smith int *nprocs,j,idx,nsends,row; 17020ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 17030ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1704a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 17050ac07820SSatish Balay MPI_Comm comm = A->comm; 17060ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 17070ac07820SSatish Balay MPI_Status recv_status,*send_status; 17080ac07820SSatish Balay IS istmp; 170935d8aa7fSBarry Smith PetscTruth found; 17100ac07820SSatish Balay 1711d64ed03dSBarry Smith PetscFunctionBegin; 1712f14a1c24SBarry Smith ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 17130ac07820SSatish Balay ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 17140ac07820SSatish Balay 17150ac07820SSatish Balay /* first count number of contributors to each processor */ 171682502324SSatish Balay ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1717549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1718b0a32e0cSBarry Smith ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 17190ac07820SSatish Balay for (i=0; i<N; i++) { 17200ac07820SSatish Balay idx = rows[i]; 172135d8aa7fSBarry Smith found = PETSC_FALSE; 17220ac07820SSatish Balay for (j=0; j<size; j++) { 17230ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1724c1dc657dSBarry Smith nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 17250ac07820SSatish Balay } 17260ac07820SSatish Balay } 172729bbc08cSBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 17280ac07820SSatish Balay } 1729c1dc657dSBarry Smith nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 17300ac07820SSatish Balay 17310ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 1732c1dc657dSBarry Smith ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 17330ac07820SSatish Balay 17340ac07820SSatish Balay /* post receives: */ 1735b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1736b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 17370ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 1738ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 17390ac07820SSatish Balay } 17400ac07820SSatish Balay 17410ac07820SSatish Balay /* do sends: 17420ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 17430ac07820SSatish Balay the ith processor 17440ac07820SSatish Balay */ 1745b0a32e0cSBarry Smith ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1746b0a32e0cSBarry Smith ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1747b0a32e0cSBarry Smith ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 17480ac07820SSatish Balay starts[0] = 0; 1749c1dc657dSBarry Smith for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17500ac07820SSatish Balay for (i=0; i<N; i++) { 17510ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 17520ac07820SSatish Balay } 17536831982aSBarry Smith ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 17540ac07820SSatish Balay 17550ac07820SSatish Balay starts[0] = 0; 1756c1dc657dSBarry Smith for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17570ac07820SSatish Balay count = 0; 17580ac07820SSatish Balay for (i=0; i<size; i++) { 1759c1dc657dSBarry Smith if (nprocs[2*i+1]) { 1760c1dc657dSBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 17610ac07820SSatish Balay } 17620ac07820SSatish Balay } 1763606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 17640ac07820SSatish Balay 17650ac07820SSatish Balay base = owners[rank]*bs; 17660ac07820SSatish Balay 17670ac07820SSatish Balay /* wait on receives */ 1768b0a32e0cSBarry Smith ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 17690ac07820SSatish Balay source = lens + nrecvs; 17700ac07820SSatish Balay count = nrecvs; slen = 0; 17710ac07820SSatish Balay while (count) { 1772ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17730ac07820SSatish Balay /* unpack receives into our local space */ 1774ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 17750ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17760ac07820SSatish Balay lens[imdex] = n; 17770ac07820SSatish Balay slen += n; 17780ac07820SSatish Balay count--; 17790ac07820SSatish Balay } 1780606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 17810ac07820SSatish Balay 17820ac07820SSatish Balay /* move the data into the send scatter */ 1783b0a32e0cSBarry Smith ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 17840ac07820SSatish Balay count = 0; 17850ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 17860ac07820SSatish Balay values = rvalues + i*nmax; 17870ac07820SSatish Balay for (j=0; j<lens[i]; j++) { 17880ac07820SSatish Balay lrows[count++] = values[j] - base; 17890ac07820SSatish Balay } 17900ac07820SSatish Balay } 1791606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 1792606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1793606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 1794606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 17950ac07820SSatish Balay 17960ac07820SSatish Balay /* actually zap the local rows */ 1797029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1798b0a32e0cSBarry Smith PetscLogObjectParent(A,istmp); 1799a07cd24cSSatish Balay 180072dacd9aSBarry Smith /* 180172dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 180272dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 180372dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 180472dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 180572dacd9aSBarry Smith 180672dacd9aSBarry Smith Contributed by: Mathew Knepley 180772dacd9aSBarry Smith */ 18089c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 18096fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 18109c957beeSSatish Balay if (diag && (l->A->M == l->A->N)) { 18116fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 18129c957beeSSatish Balay } else if (diag) { 18136fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1814fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 181529bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1816fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 18176525c446SSatish Balay } 1818a07cd24cSSatish Balay for (i=0; i<slen; i++) { 1819a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 18203f11ea53SBarry Smith ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1821a07cd24cSSatish Balay } 1822a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1823a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 18249c957beeSSatish Balay } else { 18256fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1826a07cd24cSSatish Balay } 18279c957beeSSatish Balay 18289c957beeSSatish Balay ierr = ISDestroy(istmp);CHKERRQ(ierr); 1829606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 1830a07cd24cSSatish Balay 18310ac07820SSatish Balay /* wait on sends */ 18320ac07820SSatish Balay if (nsends) { 183382502324SSatish Balay ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1834ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1835606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 18360ac07820SSatish Balay } 1837606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 1838606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 18390ac07820SSatish Balay 18403a40ed3dSBarry Smith PetscFunctionReturn(0); 18410ac07820SSatish Balay } 184272dacd9aSBarry Smith 18434a2ae208SSatish Balay #undef __FUNCT__ 18444a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1845dfbe8321SBarry Smith PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1846ba4ca20aSSatish Balay { 1847ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 184825fdafccSSatish Balay MPI_Comm comm = A->comm; 1849133cdb44SSatish Balay static int called = 0; 1850dfbe8321SBarry Smith PetscErrorCode ierr; 1851ba4ca20aSSatish Balay 1852d64ed03dSBarry Smith PetscFunctionBegin; 18533a40ed3dSBarry Smith if (!a->rank) { 18543a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 185525fdafccSSatish Balay } 185625fdafccSSatish Balay if (called) {PetscFunctionReturn(0);} else called = 1; 1857d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1858d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 18593a40ed3dSBarry Smith PetscFunctionReturn(0); 1860ba4ca20aSSatish Balay } 18610ac07820SSatish Balay 18624a2ae208SSatish Balay #undef __FUNCT__ 18634a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1864dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1865bb5a7306SBarry Smith { 1866bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1867dfbe8321SBarry Smith PetscErrorCode ierr; 1868d64ed03dSBarry Smith 1869d64ed03dSBarry Smith PetscFunctionBegin; 1870bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 18713a40ed3dSBarry Smith PetscFunctionReturn(0); 1872bb5a7306SBarry Smith } 1873bb5a7306SBarry Smith 18746849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18750ac07820SSatish Balay 18764a2ae208SSatish Balay #undef __FUNCT__ 18774a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ" 1878dfbe8321SBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 18797fc3c18eSBarry Smith { 18807fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 18817fc3c18eSBarry Smith Mat a,b,c,d; 18827fc3c18eSBarry Smith PetscTruth flg; 1883dfbe8321SBarry Smith PetscErrorCode ierr; 18847fc3c18eSBarry Smith 18857fc3c18eSBarry Smith PetscFunctionBegin; 18867fc3c18eSBarry Smith a = matA->A; b = matA->B; 18877fc3c18eSBarry Smith c = matB->A; d = matB->B; 18887fc3c18eSBarry Smith 18897fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 18907fc3c18eSBarry Smith if (flg == PETSC_TRUE) { 18917fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18927fc3c18eSBarry Smith } 18937fc3c18eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 18947fc3c18eSBarry Smith PetscFunctionReturn(0); 18957fc3c18eSBarry Smith } 18967fc3c18eSBarry Smith 1897273d9f13SBarry Smith 18984a2ae208SSatish Balay #undef __FUNCT__ 18994a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1900dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1901273d9f13SBarry Smith { 1902dfbe8321SBarry Smith PetscErrorCode ierr; 1903273d9f13SBarry Smith 1904273d9f13SBarry Smith PetscFunctionBegin; 1905273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1906273d9f13SBarry Smith PetscFunctionReturn(0); 1907273d9f13SBarry Smith } 1908273d9f13SBarry Smith 190979bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1910cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1911cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1912cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1913cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1914cc2dc46cSBarry Smith MatMult_MPIBAIJ, 191597304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ, 19167c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 19177c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 1918cc2dc46cSBarry Smith 0, 1919cc2dc46cSBarry Smith 0, 1920cc2dc46cSBarry Smith 0, 192197304618SKris Buschelman /*10*/ 0, 1922cc2dc46cSBarry Smith 0, 1923cc2dc46cSBarry Smith 0, 1924cc2dc46cSBarry Smith 0, 1925cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 192697304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ, 19277fc3c18eSBarry Smith MatEqual_MPIBAIJ, 1928cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1929cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1930cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 193197304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ, 1932cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1933cc2dc46cSBarry Smith 0, 1934cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1935cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 193697304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ, 1937cc2dc46cSBarry Smith 0, 1938cc2dc46cSBarry Smith 0, 1939cc2dc46cSBarry Smith 0, 1940cc2dc46cSBarry Smith 0, 194197304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ, 1942273d9f13SBarry Smith 0, 1943cc2dc46cSBarry Smith 0, 1944cc2dc46cSBarry Smith 0, 1945cc2dc46cSBarry Smith 0, 194697304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ, 1947cc2dc46cSBarry Smith 0, 1948cc2dc46cSBarry Smith 0, 1949cc2dc46cSBarry Smith 0, 1950cc2dc46cSBarry Smith 0, 195197304618SKris Buschelman /*40*/ 0, 1952cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1953cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1954cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1955cc2dc46cSBarry Smith 0, 195697304618SKris Buschelman /*45*/ MatPrintHelp_MPIBAIJ, 1957cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1958cc2dc46cSBarry Smith 0, 1959cc2dc46cSBarry Smith 0, 1960cc2dc46cSBarry Smith 0, 196197304618SKris Buschelman /*50*/ MatGetBlockSize_MPIBAIJ, 1962cc2dc46cSBarry Smith 0, 1963cc2dc46cSBarry Smith 0, 1964cc2dc46cSBarry Smith 0, 1965cc2dc46cSBarry Smith 0, 196697304618SKris Buschelman /*55*/ 0, 1967cc2dc46cSBarry Smith 0, 1968cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 1969cc2dc46cSBarry Smith 0, 1970cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 197197304618SKris Buschelman /*60*/ 0, 1972f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 1973f14a1c24SBarry Smith MatView_MPIBAIJ, 19748a124369SBarry Smith MatGetPetscMaps_Petsc, 19757843d17aSBarry Smith 0, 197697304618SKris Buschelman /*65*/ 0, 19777843d17aSBarry Smith 0, 19787843d17aSBarry Smith 0, 19797843d17aSBarry Smith 0, 19807843d17aSBarry Smith 0, 198197304618SKris Buschelman /*70*/ MatGetRowMax_MPIBAIJ, 19827843d17aSBarry Smith 0, 198397304618SKris Buschelman 0, 198497304618SKris Buschelman 0, 198597304618SKris Buschelman 0, 198697304618SKris Buschelman /*75*/ 0, 198797304618SKris Buschelman 0, 198897304618SKris Buschelman 0, 198997304618SKris Buschelman 0, 199097304618SKris Buschelman 0, 199197304618SKris Buschelman /*80*/ 0, 199297304618SKris Buschelman 0, 199397304618SKris Buschelman 0, 199497304618SKris Buschelman 0, 1995865e5f61SKris Buschelman MatLoad_MPIBAIJ, 1996865e5f61SKris Buschelman /*85*/ 0, 1997865e5f61SKris Buschelman 0, 1998865e5f61SKris Buschelman 0, 1999865e5f61SKris Buschelman 0, 2000865e5f61SKris Buschelman 0, 2001865e5f61SKris Buschelman /*90*/ 0, 2002865e5f61SKris Buschelman 0, 2003865e5f61SKris Buschelman 0, 2004865e5f61SKris Buschelman 0, 2005865e5f61SKris Buschelman 0, 2006865e5f61SKris Buschelman /*95*/ 0, 2007865e5f61SKris Buschelman 0, 2008865e5f61SKris Buschelman 0, 2009865e5f61SKris Buschelman 0}; 201079bdfe76SSatish Balay 20115ef9f2a5SBarry Smith 2012e18c124aSSatish Balay EXTERN_C_BEGIN 20134a2ae208SSatish Balay #undef __FUNCT__ 20144a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2015dfbe8321SBarry Smith PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 20165ef9f2a5SBarry Smith { 20175ef9f2a5SBarry Smith PetscFunctionBegin; 20185ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 20195ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 20205ef9f2a5SBarry Smith PetscFunctionReturn(0); 20215ef9f2a5SBarry Smith } 2022e18c124aSSatish Balay EXTERN_C_END 202379bdfe76SSatish Balay 2024273d9f13SBarry Smith EXTERN_C_BEGIN 2025d94109b8SHong Zhang extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*); 2026d94109b8SHong Zhang EXTERN_C_END 2027d94109b8SHong Zhang 2028d94109b8SHong Zhang EXTERN_C_BEGIN 20294a2ae208SSatish Balay #undef __FUNCT__ 2030a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2031dfbe8321SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2032a23d5eceSKris Buschelman { 2033a23d5eceSKris Buschelman Mat_MPIBAIJ *b; 2034dfbe8321SBarry Smith PetscErrorCode ierr; 2035dfbe8321SBarry Smith int i; 2036a23d5eceSKris Buschelman 2037a23d5eceSKris Buschelman PetscFunctionBegin; 2038a23d5eceSKris Buschelman B->preallocated = PETSC_TRUE; 2039a23d5eceSKris Buschelman ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2040a23d5eceSKris Buschelman 2041a23d5eceSKris Buschelman if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2042a23d5eceSKris Buschelman if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2043a23d5eceSKris Buschelman if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2044a23d5eceSKris Buschelman if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2045a23d5eceSKris Buschelman if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2046a23d5eceSKris Buschelman if (d_nnz) { 2047a23d5eceSKris Buschelman for (i=0; i<B->m/bs; i++) { 2048a23d5eceSKris Buschelman if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2049a23d5eceSKris Buschelman } 2050a23d5eceSKris Buschelman } 2051a23d5eceSKris Buschelman if (o_nnz) { 2052a23d5eceSKris Buschelman for (i=0; i<B->m/bs; i++) { 2053a23d5eceSKris Buschelman if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2054a23d5eceSKris Buschelman } 2055a23d5eceSKris Buschelman } 2056a23d5eceSKris Buschelman 2057a23d5eceSKris Buschelman ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2058a23d5eceSKris Buschelman ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2059a23d5eceSKris Buschelman ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2060a23d5eceSKris Buschelman ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2061a23d5eceSKris Buschelman 2062a23d5eceSKris Buschelman b = (Mat_MPIBAIJ*)B->data; 2063a23d5eceSKris Buschelman b->bs = bs; 2064a23d5eceSKris Buschelman b->bs2 = bs*bs; 2065a23d5eceSKris Buschelman b->mbs = B->m/bs; 2066a23d5eceSKris Buschelman b->nbs = B->n/bs; 2067a23d5eceSKris Buschelman b->Mbs = B->M/bs; 2068a23d5eceSKris Buschelman b->Nbs = B->N/bs; 2069a23d5eceSKris Buschelman 2070a23d5eceSKris Buschelman ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2071a23d5eceSKris Buschelman b->rowners[0] = 0; 2072a23d5eceSKris Buschelman for (i=2; i<=b->size; i++) { 2073a23d5eceSKris Buschelman b->rowners[i] += b->rowners[i-1]; 2074a23d5eceSKris Buschelman } 2075a23d5eceSKris Buschelman b->rstart = b->rowners[b->rank]; 2076a23d5eceSKris Buschelman b->rend = b->rowners[b->rank+1]; 2077a23d5eceSKris Buschelman 2078a23d5eceSKris Buschelman ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2079a23d5eceSKris Buschelman b->cowners[0] = 0; 2080a23d5eceSKris Buschelman for (i=2; i<=b->size; i++) { 2081a23d5eceSKris Buschelman b->cowners[i] += b->cowners[i-1]; 2082a23d5eceSKris Buschelman } 2083a23d5eceSKris Buschelman b->cstart = b->cowners[b->rank]; 2084a23d5eceSKris Buschelman b->cend = b->cowners[b->rank+1]; 2085a23d5eceSKris Buschelman 2086a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2087a23d5eceSKris Buschelman b->rowners_bs[i] = b->rowners[i]*bs; 2088a23d5eceSKris Buschelman } 2089a23d5eceSKris Buschelman b->rstart_bs = b->rstart*bs; 2090a23d5eceSKris Buschelman b->rend_bs = b->rend*bs; 2091a23d5eceSKris Buschelman b->cstart_bs = b->cstart*bs; 2092a23d5eceSKris Buschelman b->cend_bs = b->cend*bs; 2093a23d5eceSKris Buschelman 20949c097c71SKris Buschelman ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 20959c097c71SKris Buschelman ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2096c60e587dSKris Buschelman ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 20979c097c71SKris Buschelman PetscLogObjectParent(B,b->A); 20989c097c71SKris Buschelman ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 20999c097c71SKris Buschelman ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2100c60e587dSKris Buschelman ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 21019c097c71SKris Buschelman PetscLogObjectParent(B,b->B); 2102c60e587dSKris Buschelman 2103a23d5eceSKris Buschelman ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2104a23d5eceSKris Buschelman 2105a23d5eceSKris Buschelman PetscFunctionReturn(0); 2106a23d5eceSKris Buschelman } 2107a23d5eceSKris Buschelman EXTERN_C_END 2108a23d5eceSKris Buschelman 2109a23d5eceSKris Buschelman EXTERN_C_BEGIN 2110dfbe8321SBarry Smith EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2111dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 211292b32695SKris Buschelman EXTERN_C_END 21135bf65638SKris Buschelman 21140bad9183SKris Buschelman /*MC 2115fafad747SKris Buschelman MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 21160bad9183SKris Buschelman 21170bad9183SKris Buschelman Options Database Keys: 21180bad9183SKris Buschelman . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 21190bad9183SKris Buschelman 21200bad9183SKris Buschelman Level: beginner 21210bad9183SKris Buschelman 21220bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ 21230bad9183SKris Buschelman M*/ 21240bad9183SKris Buschelman 212592b32695SKris Buschelman EXTERN_C_BEGIN 2126a23d5eceSKris Buschelman #undef __FUNCT__ 21274a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ" 2128dfbe8321SBarry Smith PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2129273d9f13SBarry Smith { 2130273d9f13SBarry Smith Mat_MPIBAIJ *b; 2131dfbe8321SBarry Smith PetscErrorCode ierr; 2132273d9f13SBarry Smith PetscTruth flg; 2133273d9f13SBarry Smith 2134273d9f13SBarry Smith PetscFunctionBegin; 213582502324SSatish Balay ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 213682502324SSatish Balay B->data = (void*)b; 213782502324SSatish Balay 2138273d9f13SBarry Smith ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2139273d9f13SBarry Smith ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2140273d9f13SBarry Smith B->mapping = 0; 2141273d9f13SBarry Smith B->factor = 0; 2142273d9f13SBarry Smith B->assembled = PETSC_FALSE; 2143273d9f13SBarry Smith 2144273d9f13SBarry Smith B->insertmode = NOT_SET_VALUES; 2145273d9f13SBarry Smith ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2146273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2147273d9f13SBarry Smith 2148273d9f13SBarry Smith /* build local table of row and column ownerships */ 214982502324SSatish Balay ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 2150b0a32e0cSBarry Smith PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2151273d9f13SBarry Smith b->cowners = b->rowners + b->size + 2; 2152273d9f13SBarry Smith b->rowners_bs = b->cowners + b->size + 2; 2153273d9f13SBarry Smith 2154273d9f13SBarry Smith /* build cache for off array entries formed */ 2155273d9f13SBarry Smith ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2156273d9f13SBarry Smith b->donotstash = PETSC_FALSE; 2157273d9f13SBarry Smith b->colmap = PETSC_NULL; 2158273d9f13SBarry Smith b->garray = PETSC_NULL; 2159273d9f13SBarry Smith b->roworiented = PETSC_TRUE; 2160273d9f13SBarry Smith 2161cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 2162273d9f13SBarry Smith /* stuff for MatSetValues_XXX in single precision */ 216364a35ccbSBarry Smith b->setvalueslen = 0; 2164273d9f13SBarry Smith b->setvaluescopy = PETSC_NULL; 2165273d9f13SBarry Smith #endif 2166273d9f13SBarry Smith 2167273d9f13SBarry Smith /* stuff used in block assembly */ 2168273d9f13SBarry Smith b->barray = 0; 2169273d9f13SBarry Smith 2170273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 2171273d9f13SBarry Smith b->lvec = 0; 2172273d9f13SBarry Smith b->Mvctx = 0; 2173273d9f13SBarry Smith 2174273d9f13SBarry Smith /* stuff for MatGetRow() */ 2175273d9f13SBarry Smith b->rowindices = 0; 2176273d9f13SBarry Smith b->rowvalues = 0; 2177273d9f13SBarry Smith b->getrowactive = PETSC_FALSE; 2178273d9f13SBarry Smith 2179273d9f13SBarry Smith /* hash table stuff */ 2180273d9f13SBarry Smith b->ht = 0; 2181273d9f13SBarry Smith b->hd = 0; 2182273d9f13SBarry Smith b->ht_size = 0; 2183273d9f13SBarry Smith b->ht_flag = PETSC_FALSE; 2184273d9f13SBarry Smith b->ht_fact = 0; 2185273d9f13SBarry Smith b->ht_total_ct = 0; 2186273d9f13SBarry Smith b->ht_insert_ct = 0; 2187273d9f13SBarry Smith 2188b0a32e0cSBarry Smith ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2189273d9f13SBarry Smith if (flg) { 2190f6275e2eSBarry Smith PetscReal fact = 1.39; 2191273d9f13SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 219287828ca2SBarry Smith ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2193273d9f13SBarry Smith if (fact <= 1.0) fact = 1.39; 2194273d9f13SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2195b0a32e0cSBarry Smith PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2196273d9f13SBarry Smith } 2197273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2198273d9f13SBarry Smith "MatStoreValues_MPIBAIJ", 2199273d9f13SBarry Smith MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2200273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2201273d9f13SBarry Smith "MatRetrieveValues_MPIBAIJ", 2202273d9f13SBarry Smith MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2203273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2204273d9f13SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 2205273d9f13SBarry Smith MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2206a23d5eceSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2207a23d5eceSKris Buschelman "MatMPIBAIJSetPreallocation_MPIBAIJ", 2208a23d5eceSKris Buschelman MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 220992b32695SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 221092b32695SKris Buschelman "MatDiagonalScaleLocal_MPIBAIJ", 221192b32695SKris Buschelman MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 22125bf65638SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 22135bf65638SKris Buschelman "MatSetHashTableFactor_MPIBAIJ", 22145bf65638SKris Buschelman MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2215273d9f13SBarry Smith PetscFunctionReturn(0); 2216273d9f13SBarry Smith } 2217273d9f13SBarry Smith EXTERN_C_END 2218273d9f13SBarry Smith 2219209238afSKris Buschelman /*MC 2220002d173eSKris Buschelman MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2221209238afSKris Buschelman 2222209238afSKris Buschelman This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2223209238afSKris Buschelman and MATMPIBAIJ otherwise. 2224209238afSKris Buschelman 2225209238afSKris Buschelman Options Database Keys: 2226209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2227209238afSKris Buschelman 2228209238afSKris Buschelman Level: beginner 2229209238afSKris Buschelman 2230209238afSKris Buschelman .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ 2231209238afSKris Buschelman M*/ 2232209238afSKris Buschelman 2233209238afSKris Buschelman EXTERN_C_BEGIN 2234209238afSKris Buschelman #undef __FUNCT__ 2235209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ" 2236dfbe8321SBarry Smith PetscErrorCode MatCreate_BAIJ(Mat A) 2237dfbe8321SBarry Smith { 22386849ba73SBarry Smith PetscErrorCode ierr; 22396849ba73SBarry Smith int size; 2240209238afSKris Buschelman 2241209238afSKris Buschelman PetscFunctionBegin; 2242209238afSKris Buschelman ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2243209238afSKris Buschelman ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2244209238afSKris Buschelman if (size == 1) { 2245209238afSKris Buschelman ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2246209238afSKris Buschelman } else { 2247209238afSKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2248209238afSKris Buschelman } 2249209238afSKris Buschelman PetscFunctionReturn(0); 2250209238afSKris Buschelman } 2251209238afSKris Buschelman EXTERN_C_END 2252209238afSKris Buschelman 22534a2ae208SSatish Balay #undef __FUNCT__ 22544a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2255273d9f13SBarry Smith /*@C 2256273d9f13SBarry Smith MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2257273d9f13SBarry Smith (block compressed row). For good matrix assembly performance 2258273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 2259273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2260273d9f13SBarry Smith performance can be increased by more than a factor of 50. 2261273d9f13SBarry Smith 2262273d9f13SBarry Smith Collective on Mat 2263273d9f13SBarry Smith 2264273d9f13SBarry Smith Input Parameters: 2265273d9f13SBarry Smith + A - the matrix 2266273d9f13SBarry Smith . bs - size of blockk 2267273d9f13SBarry Smith . d_nz - number of block nonzeros per block row in diagonal portion of local 2268273d9f13SBarry Smith submatrix (same for all local rows) 2269273d9f13SBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 2270273d9f13SBarry Smith of the in diagonal portion of the local (possibly different for each block 2271273d9f13SBarry Smith row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2272273d9f13SBarry Smith . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2273273d9f13SBarry Smith submatrix (same for all local rows). 2274273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 2275273d9f13SBarry Smith off-diagonal portion of the local submatrix (possibly different for 2276273d9f13SBarry Smith each block row) or PETSC_NULL. 2277273d9f13SBarry Smith 2278273d9f13SBarry Smith Output Parameter: 2279273d9f13SBarry Smith 2280273d9f13SBarry Smith 2281273d9f13SBarry Smith Options Database Keys: 2282273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 2283273d9f13SBarry Smith block calculations (much slower) 2284273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 2285273d9f13SBarry Smith 2286273d9f13SBarry Smith Notes: 2287273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2288273d9f13SBarry Smith than it must be used on all processors that share the object for that argument. 2289273d9f13SBarry Smith 2290273d9f13SBarry Smith Storage Information: 2291273d9f13SBarry Smith For a square global matrix we define each processor's diagonal portion 2292273d9f13SBarry Smith to be its local rows and the corresponding columns (a square submatrix); 2293273d9f13SBarry Smith each processor's off-diagonal portion encompasses the remainder of the 2294273d9f13SBarry Smith local matrix (a rectangular submatrix). 2295273d9f13SBarry Smith 2296273d9f13SBarry Smith The user can specify preallocated storage for the diagonal part of 2297273d9f13SBarry Smith the local submatrix with either d_nz or d_nnz (not both). Set 2298273d9f13SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2299273d9f13SBarry Smith memory allocation. Likewise, specify preallocated storage for the 2300273d9f13SBarry Smith off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2301273d9f13SBarry Smith 2302273d9f13SBarry Smith Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2303273d9f13SBarry Smith the figure below we depict these three local rows and all columns (0-11). 2304273d9f13SBarry Smith 2305273d9f13SBarry Smith .vb 2306273d9f13SBarry Smith 0 1 2 3 4 5 6 7 8 9 10 11 2307273d9f13SBarry Smith ------------------- 2308273d9f13SBarry Smith row 3 | o o o d d d o o o o o o 2309273d9f13SBarry Smith row 4 | o o o d d d o o o o o o 2310273d9f13SBarry Smith row 5 | o o o d d d o o o o o o 2311273d9f13SBarry Smith ------------------- 2312273d9f13SBarry Smith .ve 2313273d9f13SBarry Smith 2314273d9f13SBarry Smith Thus, any entries in the d locations are stored in the d (diagonal) 2315273d9f13SBarry Smith submatrix, and any entries in the o locations are stored in the 2316273d9f13SBarry Smith o (off-diagonal) submatrix. Note that the d and the o submatrices are 2317273d9f13SBarry Smith stored simply in the MATSEQBAIJ format for compressed row storage. 2318273d9f13SBarry Smith 2319273d9f13SBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2320273d9f13SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 2321273d9f13SBarry Smith In general, for PDE problems in which most nonzeros are near the diagonal, 2322273d9f13SBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2323273d9f13SBarry Smith or you will get TERRIBLE performance; see the users' manual chapter on 2324273d9f13SBarry Smith matrices. 2325273d9f13SBarry Smith 2326273d9f13SBarry Smith Level: intermediate 2327273d9f13SBarry Smith 2328273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel 2329273d9f13SBarry Smith 2330273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2331273d9f13SBarry Smith @*/ 2332dfbe8321SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2333273d9f13SBarry Smith { 2334dfbe8321SBarry Smith PetscErrorCode ierr,(*f)(Mat,int,int,const int[],int,const int[]); 2335273d9f13SBarry Smith 2336273d9f13SBarry Smith PetscFunctionBegin; 2337a23d5eceSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2338a23d5eceSKris Buschelman if (f) { 2339a23d5eceSKris Buschelman ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2340273d9f13SBarry Smith } 2341273d9f13SBarry Smith PetscFunctionReturn(0); 2342273d9f13SBarry Smith } 2343273d9f13SBarry Smith 23444a2ae208SSatish Balay #undef __FUNCT__ 23454a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ" 234679bdfe76SSatish Balay /*@C 234779bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 234879bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 234979bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 235079bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 235179bdfe76SSatish Balay performance can be increased by more than a factor of 50. 235279bdfe76SSatish Balay 2353db81eaa0SLois Curfman McInnes Collective on MPI_Comm 2354db81eaa0SLois Curfman McInnes 235579bdfe76SSatish Balay Input Parameters: 2356db81eaa0SLois Curfman McInnes + comm - MPI communicator 235779bdfe76SSatish Balay . bs - size of blockk 235879bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 235992e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 236092e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 236192e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 236292e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 236392e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 2364be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2365be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 236647a75d0bSBarry Smith . d_nz - number of nonzero blocks per block row in diagonal portion of local 236779bdfe76SSatish Balay submatrix (same for all local rows) 236847a75d0bSBarry Smith . d_nnz - array containing the number of nonzero blocks in the various block rows 236992e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 2370db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 237147a75d0bSBarry Smith . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 237279bdfe76SSatish Balay submatrix (same for all local rows). 237347a75d0bSBarry Smith - o_nnz - array containing the number of nonzero blocks in the various block rows of the 237492e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 237592e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 237679bdfe76SSatish Balay 237779bdfe76SSatish Balay Output Parameter: 237879bdfe76SSatish Balay . A - the matrix 237979bdfe76SSatish Balay 2380db81eaa0SLois Curfman McInnes Options Database Keys: 2381db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 2382db81eaa0SLois Curfman McInnes block calculations (much slower) 2383db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 23843ffaccefSLois Curfman McInnes 2385b259b22eSLois Curfman McInnes Notes: 238647a75d0bSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 238747a75d0bSBarry Smith 238879bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 238979bdfe76SSatish Balay (possibly both). 239079bdfe76SSatish Balay 2391be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2392be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 2393be79a94dSBarry Smith 239479bdfe76SSatish Balay Storage Information: 239579bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 239679bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 239779bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 239879bdfe76SSatish Balay local matrix (a rectangular submatrix). 239979bdfe76SSatish Balay 240079bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 240179bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 240279bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 240379bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 240479bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 240579bdfe76SSatish Balay 240679bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 240779bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 240879bdfe76SSatish Balay 2409db81eaa0SLois Curfman McInnes .vb 2410db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 2411db81eaa0SLois Curfman McInnes ------------------- 2412db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 2413db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 2414db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 2415db81eaa0SLois Curfman McInnes ------------------- 2416db81eaa0SLois Curfman McInnes .ve 241779bdfe76SSatish Balay 241879bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 241979bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 242079bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 242157b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 242279bdfe76SSatish Balay 2423d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2424d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 242579bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 242692e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 242792e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 24286da5968aSLois Curfman McInnes matrices. 242979bdfe76SSatish Balay 2430027ccd11SLois Curfman McInnes Level: intermediate 2431027ccd11SLois Curfman McInnes 243292e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 243379bdfe76SSatish Balay 24343eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 243579bdfe76SSatish Balay @*/ 2436dfbe8321SBarry Smith PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 243779bdfe76SSatish Balay { 24386849ba73SBarry Smith PetscErrorCode ierr; 24396849ba73SBarry Smith int size; 244079bdfe76SSatish Balay 2441d64ed03dSBarry Smith PetscFunctionBegin; 2442273d9f13SBarry Smith ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2443d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2444273d9f13SBarry Smith if (size > 1) { 2445273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2446273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2447273d9f13SBarry Smith } else { 2448273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2449273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 24503914022bSBarry Smith } 24513a40ed3dSBarry Smith PetscFunctionReturn(0); 245279bdfe76SSatish Balay } 2453026e39d0SSatish Balay 24544a2ae208SSatish Balay #undef __FUNCT__ 24554a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ" 24566849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 24570ac07820SSatish Balay { 24580ac07820SSatish Balay Mat mat; 24590ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2460dfbe8321SBarry Smith PetscErrorCode ierr; 2461dfbe8321SBarry Smith int len=0; 24620ac07820SSatish Balay 2463d64ed03dSBarry Smith PetscFunctionBegin; 24640ac07820SSatish Balay *newmat = 0; 2465273d9f13SBarry Smith ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2466be5d1d56SKris Buschelman ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 24671d5dac46SHong Zhang ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 24687fff6886SHong Zhang 24694beb1cfeSHong Zhang mat->factor = matin->factor; 2470273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 24710ac07820SSatish Balay mat->assembled = PETSC_TRUE; 24727fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 24737fff6886SHong Zhang 2474273d9f13SBarry Smith a = (Mat_MPIBAIJ*)mat->data; 24750ac07820SSatish Balay a->bs = oldmat->bs; 24760ac07820SSatish Balay a->bs2 = oldmat->bs2; 24770ac07820SSatish Balay a->mbs = oldmat->mbs; 24780ac07820SSatish Balay a->nbs = oldmat->nbs; 24790ac07820SSatish Balay a->Mbs = oldmat->Mbs; 24800ac07820SSatish Balay a->Nbs = oldmat->Nbs; 24810ac07820SSatish Balay 24820ac07820SSatish Balay a->rstart = oldmat->rstart; 24830ac07820SSatish Balay a->rend = oldmat->rend; 24840ac07820SSatish Balay a->cstart = oldmat->cstart; 24850ac07820SSatish Balay a->cend = oldmat->cend; 24860ac07820SSatish Balay a->size = oldmat->size; 24870ac07820SSatish Balay a->rank = oldmat->rank; 2488aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2489aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2490aef5e8e0SSatish Balay a->rowindices = 0; 24910ac07820SSatish Balay a->rowvalues = 0; 24920ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 249330793edcSSatish Balay a->barray = 0; 24943123a43fSSatish Balay a->rstart_bs = oldmat->rstart_bs; 24953123a43fSSatish Balay a->rend_bs = oldmat->rend_bs; 24963123a43fSSatish Balay a->cstart_bs = oldmat->cstart_bs; 24973123a43fSSatish Balay a->cend_bs = oldmat->cend_bs; 24980ac07820SSatish Balay 2499133cdb44SSatish Balay /* hash table stuff */ 2500133cdb44SSatish Balay a->ht = 0; 2501133cdb44SSatish Balay a->hd = 0; 2502133cdb44SSatish Balay a->ht_size = 0; 2503133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 250425fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2505133cdb44SSatish Balay a->ht_total_ct = 0; 2506133cdb44SSatish Balay a->ht_insert_ct = 0; 2507133cdb44SSatish Balay 2508549d3d68SSatish Balay ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 25098798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 25108798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 25110ac07820SSatish Balay if (oldmat->colmap) { 2512aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 25130f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 251448e59246SSatish Balay #else 251582502324SSatish Balay ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2516b0a32e0cSBarry Smith PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2517549d3d68SSatish Balay ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 251848e59246SSatish Balay #endif 25190ac07820SSatish Balay } else a->colmap = 0; 25204beb1cfeSHong Zhang 25210ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 252282502324SSatish Balay ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2523b0a32e0cSBarry Smith PetscLogObjectMemory(mat,len*sizeof(int)); 2524549d3d68SSatish Balay ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 25250ac07820SSatish Balay } else a->garray = 0; 25260ac07820SSatish Balay 25270ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2528b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->lvec); 25290ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2530b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->Mvctx); 25317fff6886SHong Zhang 25322e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2533b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->A); 25342e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2535b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->B); 2536b0a32e0cSBarry Smith ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 25370ac07820SSatish Balay *newmat = mat; 25384beb1cfeSHong Zhang 25393a40ed3dSBarry Smith PetscFunctionReturn(0); 25400ac07820SSatish Balay } 254157b952d6SSatish Balay 2542e090d566SSatish Balay #include "petscsys.h" 254357b952d6SSatish Balay 25444a2ae208SSatish Balay #undef __FUNCT__ 25454a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ" 2546dfbe8321SBarry Smith PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 254757b952d6SSatish Balay { 254857b952d6SSatish Balay Mat A; 25496849ba73SBarry Smith PetscErrorCode ierr; 25506849ba73SBarry Smith int i,nz,j,rstart,rend,fd; 255187828ca2SBarry Smith PetscScalar *vals,*buf; 255257b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 255357b952d6SSatish Balay MPI_Status status; 2554cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 255557b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2556f1af5d2fSBarry Smith int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 255757b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 255857b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 255957b952d6SSatish Balay 2560d64ed03dSBarry Smith PetscFunctionBegin; 2561b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 256257b952d6SSatish Balay 2563d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2564d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 256557b952d6SSatish Balay if (!rank) { 2566b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2567e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2568552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2569d64ed03dSBarry Smith if (header[3] < 0) { 257029bbc08cSBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2571d64ed03dSBarry Smith } 25726c5fab8fSBarry Smith } 2573d64ed03dSBarry Smith 2574ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 257557b952d6SSatish Balay M = header[1]; N = header[2]; 257657b952d6SSatish Balay 257729bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 257857b952d6SSatish Balay 257957b952d6SSatish Balay /* 258057b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 258157b952d6SSatish Balay divisible by the blocksize 258257b952d6SSatish Balay */ 258357b952d6SSatish Balay Mbs = M/bs; 258457b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 258557b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 258657b952d6SSatish Balay else Mbs++; 258757b952d6SSatish Balay if (extra_rows &&!rank) { 2588b0a32e0cSBarry Smith PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 258957b952d6SSatish Balay } 2590537820f0SBarry Smith 259157b952d6SSatish Balay /* determine ownership of all rows */ 259257b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 259357b952d6SSatish Balay m = mbs*bs; 2594b0a32e0cSBarry Smith ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2595cee3aa6bSSatish Balay browners = rowners + size + 1; 2596ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 259757b952d6SSatish Balay rowners[0] = 0; 2598cee3aa6bSSatish Balay for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2599cee3aa6bSSatish Balay for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 260057b952d6SSatish Balay rstart = rowners[rank]; 260157b952d6SSatish Balay rend = rowners[rank+1]; 260257b952d6SSatish Balay 260357b952d6SSatish Balay /* distribute row lengths to all processors */ 260482502324SSatish Balay ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 260557b952d6SSatish Balay if (!rank) { 2606b0a32e0cSBarry Smith ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2607e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 260857b952d6SSatish Balay for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 260982502324SSatish Balay ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2610cee3aa6bSSatish Balay for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2611ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2612606d414cSSatish Balay ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2613d64ed03dSBarry Smith } else { 2614ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 261557b952d6SSatish Balay } 261657b952d6SSatish Balay 261757b952d6SSatish Balay if (!rank) { 261857b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 261982502324SSatish Balay ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2620549d3d68SSatish Balay ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 262157b952d6SSatish Balay for (i=0; i<size; i++) { 262257b952d6SSatish Balay for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 262357b952d6SSatish Balay procsnz[i] += rowlengths[j]; 262457b952d6SSatish Balay } 262557b952d6SSatish Balay } 2626606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 262757b952d6SSatish Balay 262857b952d6SSatish Balay /* determine max buffer needed and allocate it */ 262957b952d6SSatish Balay maxnz = 0; 263057b952d6SSatish Balay for (i=0; i<size; i++) { 263157b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 263257b952d6SSatish Balay } 263382502324SSatish Balay ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 263457b952d6SSatish Balay 263557b952d6SSatish Balay /* read in my part of the matrix column indices */ 263657b952d6SSatish Balay nz = procsnz[0]; 263782502324SSatish Balay ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 263857b952d6SSatish Balay mycols = ibuf; 2639cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2640e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2641cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2642cee3aa6bSSatish Balay 264357b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 264457b952d6SSatish Balay for (i=1; i<size-1; i++) { 264557b952d6SSatish Balay nz = procsnz[i]; 2646e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2647ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 264857b952d6SSatish Balay } 264957b952d6SSatish Balay /* read in the stuff for the last proc */ 265057b952d6SSatish Balay if (size != 1) { 265157b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2652e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 265357b952d6SSatish Balay for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2654ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 265557b952d6SSatish Balay } 2656606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 2657d64ed03dSBarry Smith } else { 265857b952d6SSatish Balay /* determine buffer space needed for message */ 265957b952d6SSatish Balay nz = 0; 266057b952d6SSatish Balay for (i=0; i<m; i++) { 266157b952d6SSatish Balay nz += locrowlens[i]; 266257b952d6SSatish Balay } 266382502324SSatish Balay ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 266457b952d6SSatish Balay mycols = ibuf; 266557b952d6SSatish Balay /* receive message of column indices*/ 2666ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2667ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 266829bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 266957b952d6SSatish Balay } 267057b952d6SSatish Balay 267157b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 267282502324SSatish Balay ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2673cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 267482502324SSatish Balay ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2675549d3d68SSatish Balay ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 267657b952d6SSatish Balay masked1 = mask + Mbs; 267757b952d6SSatish Balay masked2 = masked1 + Mbs; 267857b952d6SSatish Balay rowcount = 0; nzcount = 0; 267957b952d6SSatish Balay for (i=0; i<mbs; i++) { 268057b952d6SSatish Balay dcount = 0; 268157b952d6SSatish Balay odcount = 0; 268257b952d6SSatish Balay for (j=0; j<bs; j++) { 268357b952d6SSatish Balay kmax = locrowlens[rowcount]; 268457b952d6SSatish Balay for (k=0; k<kmax; k++) { 268557b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 268657b952d6SSatish Balay if (!mask[tmp]) { 268757b952d6SSatish Balay mask[tmp] = 1; 268857b952d6SSatish Balay if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 268957b952d6SSatish Balay else masked1[dcount++] = tmp; 269057b952d6SSatish Balay } 269157b952d6SSatish Balay } 269257b952d6SSatish Balay rowcount++; 269357b952d6SSatish Balay } 2694cee3aa6bSSatish Balay 269557b952d6SSatish Balay dlens[i] = dcount; 269657b952d6SSatish Balay odlens[i] = odcount; 2697cee3aa6bSSatish Balay 269857b952d6SSatish Balay /* zero out the mask elements we set */ 269957b952d6SSatish Balay for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 270057b952d6SSatish Balay for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 270157b952d6SSatish Balay } 2702cee3aa6bSSatish Balay 270357b952d6SSatish Balay /* create our matrix */ 270478ae41b4SKris Buschelman ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 270578ae41b4SKris Buschelman ierr = MatSetType(A,type);CHKERRQ(ierr) 270678ae41b4SKris Buschelman ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 270778ae41b4SKris Buschelman 270878ae41b4SKris Buschelman /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 27096d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 271057b952d6SSatish Balay 271157b952d6SSatish Balay if (!rank) { 271287828ca2SBarry Smith ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 271357b952d6SSatish Balay /* read in my part of the matrix numerical values */ 271457b952d6SSatish Balay nz = procsnz[0]; 271557b952d6SSatish Balay vals = buf; 2716cee3aa6bSSatish Balay mycols = ibuf; 2717cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2718e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2719cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2720537820f0SBarry Smith 272157b952d6SSatish Balay /* insert into matrix */ 272257b952d6SSatish Balay jj = rstart*bs; 272357b952d6SSatish Balay for (i=0; i<m; i++) { 2724b48991e4SBarry Smith ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 272557b952d6SSatish Balay mycols += locrowlens[i]; 272657b952d6SSatish Balay vals += locrowlens[i]; 272757b952d6SSatish Balay jj++; 272857b952d6SSatish Balay } 272957b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 273057b952d6SSatish Balay for (i=1; i<size-1; i++) { 273157b952d6SSatish Balay nz = procsnz[i]; 273257b952d6SSatish Balay vals = buf; 2733e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2734ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 273557b952d6SSatish Balay } 273657b952d6SSatish Balay /* the last proc */ 273757b952d6SSatish Balay if (size != 1){ 273857b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2739cee3aa6bSSatish Balay vals = buf; 2740e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 274157b952d6SSatish Balay for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2742ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 274357b952d6SSatish Balay } 2744606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 2745d64ed03dSBarry Smith } else { 274657b952d6SSatish Balay /* receive numeric values */ 274787828ca2SBarry Smith ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 274857b952d6SSatish Balay 274957b952d6SSatish Balay /* receive message of values*/ 275057b952d6SSatish Balay vals = buf; 2751cee3aa6bSSatish Balay mycols = ibuf; 2752ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2753ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 275429bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 275557b952d6SSatish Balay 275657b952d6SSatish Balay /* insert into matrix */ 275757b952d6SSatish Balay jj = rstart*bs; 2758cee3aa6bSSatish Balay for (i=0; i<m; i++) { 2759b48991e4SBarry Smith ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 276057b952d6SSatish Balay mycols += locrowlens[i]; 276157b952d6SSatish Balay vals += locrowlens[i]; 276257b952d6SSatish Balay jj++; 276357b952d6SSatish Balay } 276457b952d6SSatish Balay } 2765606d414cSSatish Balay ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2766606d414cSSatish Balay ierr = PetscFree(buf);CHKERRQ(ierr); 2767606d414cSSatish Balay ierr = PetscFree(ibuf);CHKERRQ(ierr); 2768606d414cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 2769606d414cSSatish Balay ierr = PetscFree(dlens);CHKERRQ(ierr); 2770606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 27716d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 27726d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 277378ae41b4SKris Buschelman 277478ae41b4SKris Buschelman *newmat = A; 27753a40ed3dSBarry Smith PetscFunctionReturn(0); 277657b952d6SSatish Balay } 277757b952d6SSatish Balay 27784a2ae208SSatish Balay #undef __FUNCT__ 27794a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2780133cdb44SSatish Balay /*@ 2781133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2782133cdb44SSatish Balay 2783133cdb44SSatish Balay Input Parameters: 2784133cdb44SSatish Balay . mat - the matrix 2785133cdb44SSatish Balay . fact - factor 2786133cdb44SSatish Balay 2787fee21e36SBarry Smith Collective on Mat 2788fee21e36SBarry Smith 27898c890885SBarry Smith Level: advanced 27908c890885SBarry Smith 2791133cdb44SSatish Balay Notes: 2792133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2793133cdb44SSatish Balay 2794133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2795133cdb44SSatish Balay 2796133cdb44SSatish Balay .seealso: MatSetOption() 2797133cdb44SSatish Balay @*/ 2798dfbe8321SBarry Smith PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2799133cdb44SSatish Balay { 2800dfbe8321SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscReal); 28015bf65638SKris Buschelman 28025bf65638SKris Buschelman PetscFunctionBegin; 28035bf65638SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 28045bf65638SKris Buschelman if (f) { 28055bf65638SKris Buschelman ierr = (*f)(mat,fact);CHKERRQ(ierr); 28065bf65638SKris Buschelman } 28075bf65638SKris Buschelman PetscFunctionReturn(0); 28085bf65638SKris Buschelman } 28095bf65638SKris Buschelman 28105bf65638SKris Buschelman #undef __FUNCT__ 28115bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2812dfbe8321SBarry Smith PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 28135bf65638SKris Buschelman { 281425fdafccSSatish Balay Mat_MPIBAIJ *baij; 2815133cdb44SSatish Balay 2816133cdb44SSatish Balay PetscFunctionBegin; 2817133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 2818133cdb44SSatish Balay baij->ht_fact = fact; 2819133cdb44SSatish Balay PetscFunctionReturn(0); 2820133cdb44SSatish Balay } 2821f2a5309cSSatish Balay 28224a2ae208SSatish Balay #undef __FUNCT__ 28234a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2824dfbe8321SBarry Smith PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2825f2a5309cSSatish Balay { 2826f2a5309cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2827f2a5309cSSatish Balay PetscFunctionBegin; 2828f2a5309cSSatish Balay *Ad = a->A; 2829f2a5309cSSatish Balay *Ao = a->B; 2830195d93cdSBarry Smith *colmap = a->garray; 2831f2a5309cSSatish Balay PetscFunctionReturn(0); 2832f2a5309cSSatish Balay } 2833