1bba1ac68SSatish Balay /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/ 279bdfe76SSatish Balay 3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 479bdfe76SSatish Balay 5ca44d042SBarry Smith EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 6ca44d042SBarry Smith EXTERN int DisAssemble_MPIBAIJ(Mat); 7268466fbSBarry Smith EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int); 8268466fbSBarry Smith EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 9f15d580aSBarry Smith EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []); 10f15d580aSBarry Smith EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode); 11f15d580aSBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 12268466fbSBarry Smith EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 13268466fbSBarry Smith EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 14ca44d042SBarry Smith EXTERN int MatPrintHelp_SeqBAIJ(Mat); 15268466fbSBarry Smith EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 1693fea6afSBarry Smith 1793fea6afSBarry Smith /* UGLY, ugly, ugly 1887828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 1993fea6afSBarry Smith not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 2093fea6afSBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 2193fea6afSBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 2293fea6afSBarry Smith into the single precision data structures. 2393fea6afSBarry Smith */ 2493fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2534232ad1SSatish Balay EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 2634232ad1SSatish Balay EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 2734232ad1SSatish Balay EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 2834232ad1SSatish Balay EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 2934232ad1SSatish Balay EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 3093fea6afSBarry Smith #else 316fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 3293fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 3393fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 346fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 356fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 3693fea6afSBarry Smith #endif 373b2fbd54SBarry Smith 384a2ae208SSatish Balay #undef __FUNCT__ 394a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 407843d17aSBarry Smith int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 417843d17aSBarry Smith { 427843d17aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 437843d17aSBarry Smith int ierr,i; 4487828ca2SBarry Smith PetscScalar *va,*vb; 457843d17aSBarry Smith Vec vtmp; 467843d17aSBarry Smith 477843d17aSBarry Smith PetscFunctionBegin; 487843d17aSBarry Smith 497843d17aSBarry Smith ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50b1d4fb26SBarry Smith ierr = VecGetArrayFast(v,&va);CHKERRQ(ierr); 517843d17aSBarry Smith 52ac355199SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 537843d17aSBarry Smith ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54b1d4fb26SBarry Smith ierr = VecGetArrayFast(vtmp,&vb);CHKERRQ(ierr); 557843d17aSBarry Smith 56273d9f13SBarry Smith for (i=0; i<A->m; i++){ 57273d9f13SBarry Smith if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 587843d17aSBarry Smith } 597843d17aSBarry Smith 60b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(v,&va);CHKERRQ(ierr); 61b1d4fb26SBarry Smith ierr = VecRestoreArrayFast(vtmp,&vb);CHKERRQ(ierr); 62ac355199SBarry Smith ierr = VecDestroy(vtmp);CHKERRQ(ierr); 637843d17aSBarry Smith 647843d17aSBarry Smith PetscFunctionReturn(0); 657843d17aSBarry Smith } 667843d17aSBarry Smith 677fc3c18eSBarry Smith EXTERN_C_BEGIN 684a2ae208SSatish Balay #undef __FUNCT__ 694a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ" 707fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat) 717fc3c18eSBarry Smith { 727fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 737fc3c18eSBarry Smith int ierr; 747fc3c18eSBarry Smith 757fc3c18eSBarry Smith PetscFunctionBegin; 767fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 777fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 787fc3c18eSBarry Smith PetscFunctionReturn(0); 797fc3c18eSBarry Smith } 807fc3c18eSBarry Smith EXTERN_C_END 817fc3c18eSBarry Smith 827fc3c18eSBarry Smith EXTERN_C_BEGIN 834a2ae208SSatish Balay #undef __FUNCT__ 844a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 857fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat) 867fc3c18eSBarry Smith { 877fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 887fc3c18eSBarry Smith int ierr; 897fc3c18eSBarry Smith 907fc3c18eSBarry Smith PetscFunctionBegin; 917fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 927fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 937fc3c18eSBarry Smith PetscFunctionReturn(0); 947fc3c18eSBarry Smith } 957fc3c18eSBarry Smith EXTERN_C_END 967fc3c18eSBarry Smith 97537820f0SBarry Smith /* 98537820f0SBarry Smith Local utility routine that creates a mapping from the global column 9957b952d6SSatish Balay number to the local number in the off-diagonal part of the local 10057b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 10157b952d6SSatish Balay length of colmap equals the global matrix length. 10257b952d6SSatish Balay */ 1034a2ae208SSatish Balay #undef __FUNCT__ 1044a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105653e4784SBarry Smith int CreateColmap_MPIBAIJ_Private(Mat mat) 10657b952d6SSatish Balay { 10757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 10857b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109dc2900e9SSatish Balay int nbs = B->nbs,i,bs=B->bs,ierr; 11057b952d6SSatish Balay 111d64ed03dSBarry Smith PetscFunctionBegin; 112aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 113f14a1c24SBarry Smith ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 11448e59246SSatish Balay for (i=0; i<nbs; i++){ 1150f5bd95cSBarry Smith ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 11648e59246SSatish Balay } 11748e59246SSatish Balay #else 11882502324SSatish Balay ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 119b0a32e0cSBarry Smith PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 120549d3d68SSatish Balay ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 121928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 12248e59246SSatish Balay #endif 1233a40ed3dSBarry Smith PetscFunctionReturn(0); 12457b952d6SSatish Balay } 12557b952d6SSatish Balay 12680c1aa95SSatish Balay #define CHUNKSIZE 10 12780c1aa95SSatish Balay 128f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 12980c1aa95SSatish Balay { \ 13080c1aa95SSatish Balay \ 13180c1aa95SSatish Balay brow = row/bs; \ 13280c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 133ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 13480c1aa95SSatish Balay bcol = col/bs; \ 13580c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 136ab26458aSBarry Smith low = 0; high = nrow; \ 137ab26458aSBarry Smith while (high-low > 3) { \ 138ab26458aSBarry Smith t = (low+high)/2; \ 139ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 140ab26458aSBarry Smith else low = t; \ 141ab26458aSBarry Smith } \ 142ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 14380c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 14480c1aa95SSatish Balay if (rp[_i] == bcol) { \ 14580c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 146eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 147eada6651SSatish Balay else *bap = value; \ 148ac7a638eSSatish Balay goto a_noinsert; \ 14980c1aa95SSatish Balay } \ 15080c1aa95SSatish Balay } \ 15189280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 152a45adfd6SMatthew Knepley else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 15380c1aa95SSatish Balay if (nrow >= rmax) { \ 15480c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 15580c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 1563eda8832SBarry Smith MatScalar *new_a; \ 15780c1aa95SSatish Balay \ 158a45adfd6SMatthew Knepley if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 15989280ab3SLois Curfman McInnes \ 16080c1aa95SSatish Balay /* malloc new storage space */ \ 1613eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 16282502324SSatish Balay ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 16380c1aa95SSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 16480c1aa95SSatish Balay new_i = new_j + new_nz; \ 16580c1aa95SSatish Balay \ 16680c1aa95SSatish Balay /* copy over old data into new slots */ \ 16780c1aa95SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 16880c1aa95SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 169549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 17080c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 1713eda8832SBarry Smith ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 1723eda8832SBarry Smith ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 17387828ca2SBarry Smith ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 174549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 1753eda8832SBarry Smith aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 17680c1aa95SSatish Balay /* free up old matrix storage */ \ 177606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); \ 178606d414cSSatish Balay if (!a->singlemalloc) { \ 179606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); \ 180606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr);\ 181606d414cSSatish Balay } \ 18280c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1837c922b88SBarry Smith a->singlemalloc = PETSC_TRUE; \ 18480c1aa95SSatish Balay \ 18580c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 186ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 187b0a32e0cSBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 18880c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 18980c1aa95SSatish Balay a->reallocs++; \ 19080c1aa95SSatish Balay a->nz++; \ 19180c1aa95SSatish Balay } \ 19280c1aa95SSatish Balay N = nrow++ - 1; \ 19380c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 19480c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 19580c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1963eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 19780c1aa95SSatish Balay } \ 1983eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 19980c1aa95SSatish Balay rp[_i] = bcol; \ 20080c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 201ac7a638eSSatish Balay a_noinsert:; \ 20280c1aa95SSatish Balay ailen[brow] = nrow; \ 20380c1aa95SSatish Balay } 20457b952d6SSatish Balay 205ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 206ac7a638eSSatish Balay { \ 207ac7a638eSSatish Balay brow = row/bs; \ 208ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 209ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 210ac7a638eSSatish Balay bcol = col/bs; \ 211ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 212ac7a638eSSatish Balay low = 0; high = nrow; \ 213ac7a638eSSatish Balay while (high-low > 3) { \ 214ac7a638eSSatish Balay t = (low+high)/2; \ 215ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 216ac7a638eSSatish Balay else low = t; \ 217ac7a638eSSatish Balay } \ 218ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 219ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 220ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 221ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 222ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 223ac7a638eSSatish Balay else *bap = value; \ 224ac7a638eSSatish Balay goto b_noinsert; \ 225ac7a638eSSatish Balay } \ 226ac7a638eSSatish Balay } \ 22789280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 228a45adfd6SMatthew Knepley else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 229ac7a638eSSatish Balay if (nrow >= rmax) { \ 230ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 23174c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 2323eda8832SBarry Smith MatScalar *new_a; \ 233ac7a638eSSatish Balay \ 234a45adfd6SMatthew Knepley if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 23589280ab3SLois Curfman McInnes \ 236ac7a638eSSatish Balay /* malloc new storage space */ \ 2373eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 23882502324SSatish Balay ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 239ac7a638eSSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 240ac7a638eSSatish Balay new_i = new_j + new_nz; \ 241ac7a638eSSatish Balay \ 242ac7a638eSSatish Balay /* copy over old data into new slots */ \ 243ac7a638eSSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 24474c639caSSatish Balay for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 245549d3d68SSatish Balay ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 246ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 2473eda8832SBarry Smith ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 2483eda8832SBarry Smith ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 2493eda8832SBarry Smith ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 250549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 2513eda8832SBarry Smith ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 252ac7a638eSSatish Balay /* free up old matrix storage */ \ 253606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); \ 254606d414cSSatish Balay if (!b->singlemalloc) { \ 255606d414cSSatish Balay ierr = PetscFree(b->i);CHKERRQ(ierr); \ 256606d414cSSatish Balay ierr = PetscFree(b->j);CHKERRQ(ierr); \ 257606d414cSSatish Balay } \ 25874c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 2597c922b88SBarry Smith b->singlemalloc = PETSC_TRUE; \ 260ac7a638eSSatish Balay \ 261ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 262ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 263b0a32e0cSBarry Smith PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 26474c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 26574c639caSSatish Balay b->reallocs++; \ 26674c639caSSatish Balay b->nz++; \ 267ac7a638eSSatish Balay } \ 268ac7a638eSSatish Balay N = nrow++ - 1; \ 269ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 270ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 271ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 2723eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 273ac7a638eSSatish Balay } \ 2743eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 275ac7a638eSSatish Balay rp[_i] = bcol; \ 276ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 277ac7a638eSSatish Balay b_noinsert:; \ 278ac7a638eSSatish Balay bilen[brow] = nrow; \ 279ac7a638eSSatish Balay } 280ac7a638eSSatish Balay 28193fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2824a2ae208SSatish Balay #undef __FUNCT__ 2834a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ" 284f15d580aSBarry Smith int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 28557b952d6SSatish Balay { 2866fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 28793fea6afSBarry Smith int ierr,i,N = m*n; 2886fa18ffdSBarry Smith MatScalar *vsingle; 28993fea6afSBarry Smith 29093fea6afSBarry Smith PetscFunctionBegin; 2916fa18ffdSBarry Smith if (N > b->setvalueslen) { 2926fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 29382502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2946fa18ffdSBarry Smith b->setvalueslen = N; 2956fa18ffdSBarry Smith } 2966fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2976fa18ffdSBarry Smith 29893fea6afSBarry Smith for (i=0; i<N; i++) { 29993fea6afSBarry Smith vsingle[i] = v[i]; 30093fea6afSBarry Smith } 30193fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 30293fea6afSBarry Smith PetscFunctionReturn(0); 30393fea6afSBarry Smith } 30493fea6afSBarry Smith 3054a2ae208SSatish Balay #undef __FUNCT__ 3064a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 307f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 30893fea6afSBarry Smith { 3096fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 3106fa18ffdSBarry Smith int ierr,i,N = m*n*b->bs2; 3116fa18ffdSBarry Smith MatScalar *vsingle; 31293fea6afSBarry Smith 31393fea6afSBarry Smith PetscFunctionBegin; 3146fa18ffdSBarry Smith if (N > b->setvalueslen) { 3156fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 31682502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3176fa18ffdSBarry Smith b->setvalueslen = N; 3186fa18ffdSBarry Smith } 3196fa18ffdSBarry Smith vsingle = b->setvaluescopy; 32093fea6afSBarry Smith for (i=0; i<N; i++) { 32193fea6afSBarry Smith vsingle[i] = v[i]; 32293fea6afSBarry Smith } 32393fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 32493fea6afSBarry Smith PetscFunctionReturn(0); 32593fea6afSBarry Smith } 3266fa18ffdSBarry Smith 3274a2ae208SSatish Balay #undef __FUNCT__ 3284a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 329f15d580aSBarry Smith int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 3306fa18ffdSBarry Smith { 3316fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 3326fa18ffdSBarry Smith int ierr,i,N = m*n; 3336fa18ffdSBarry Smith MatScalar *vsingle; 3346fa18ffdSBarry Smith 3356fa18ffdSBarry Smith PetscFunctionBegin; 3366fa18ffdSBarry Smith if (N > b->setvalueslen) { 3376fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 33882502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3396fa18ffdSBarry Smith b->setvalueslen = N; 3406fa18ffdSBarry Smith } 3416fa18ffdSBarry Smith vsingle = b->setvaluescopy; 3426fa18ffdSBarry Smith for (i=0; i<N; i++) { 3436fa18ffdSBarry Smith vsingle[i] = v[i]; 3446fa18ffdSBarry Smith } 3456fa18ffdSBarry Smith ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 3466fa18ffdSBarry Smith PetscFunctionReturn(0); 3476fa18ffdSBarry Smith } 3486fa18ffdSBarry Smith 3494a2ae208SSatish Balay #undef __FUNCT__ 3504a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 351f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 3526fa18ffdSBarry Smith { 3536fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 3546fa18ffdSBarry Smith int ierr,i,N = m*n*b->bs2; 3556fa18ffdSBarry Smith MatScalar *vsingle; 3566fa18ffdSBarry Smith 3576fa18ffdSBarry Smith PetscFunctionBegin; 3586fa18ffdSBarry Smith if (N > b->setvalueslen) { 3596fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 36082502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3616fa18ffdSBarry Smith b->setvalueslen = N; 3626fa18ffdSBarry Smith } 3636fa18ffdSBarry Smith vsingle = b->setvaluescopy; 3646fa18ffdSBarry Smith for (i=0; i<N; i++) { 3656fa18ffdSBarry Smith vsingle[i] = v[i]; 3666fa18ffdSBarry Smith } 3676fa18ffdSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 3686fa18ffdSBarry Smith PetscFunctionReturn(0); 3696fa18ffdSBarry Smith } 37093fea6afSBarry Smith #endif 37193fea6afSBarry Smith 3724a2ae208SSatish Balay #undef __FUNCT__ 373e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 374f15d580aSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 37593fea6afSBarry Smith { 37657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 37793fea6afSBarry Smith MatScalar value; 378273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 3794fa0d573SSatish Balay int ierr,i,j,row,col; 380273d9f13SBarry Smith int rstart_orig=baij->rstart_bs; 3814fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 3824fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 38357b952d6SSatish Balay 384eada6651SSatish Balay /* Some Variables required in the macro */ 38580c1aa95SSatish Balay Mat A = baij->A; 38680c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 387ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 3883eda8832SBarry Smith MatScalar *aa=a->a; 389ac7a638eSSatish Balay 390ac7a638eSSatish Balay Mat B = baij->B; 391ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 392ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 3933eda8832SBarry Smith MatScalar *ba=b->a; 394ac7a638eSSatish Balay 395ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 396ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 3973eda8832SBarry Smith MatScalar *ap,*bap; 39880c1aa95SSatish Balay 399d64ed03dSBarry Smith PetscFunctionBegin; 40057b952d6SSatish Balay for (i=0; i<m; i++) { 4015ef9f2a5SBarry Smith if (im[i] < 0) continue; 402aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 403590ac198SBarry Smith if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 404639f9d9dSBarry Smith #endif 40557b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 40657b952d6SSatish Balay row = im[i] - rstart_orig; 40757b952d6SSatish Balay for (j=0; j<n; j++) { 40857b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 40957b952d6SSatish Balay col = in[j] - cstart_orig; 41057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 411f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 41280c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 41373959e64SBarry Smith } else if (in[j] < 0) continue; 414aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 415590ac198SBarry Smith else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);} 416639f9d9dSBarry Smith #endif 41757b952d6SSatish Balay else { 41857b952d6SSatish Balay if (mat->was_assembled) { 419905e6a2fSBarry Smith if (!baij->colmap) { 420905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 421905e6a2fSBarry Smith } 422aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 4230f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 424bba1ac68SSatish Balay col = col - 1; 42548e59246SSatish Balay #else 426bba1ac68SSatish Balay col = baij->colmap[in[j]/bs] - 1; 42748e59246SSatish Balay #endif 42857b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 42957b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4308295de27SSatish Balay col = in[j]; 4319bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 4329bf004c3SSatish Balay B = baij->B; 4339bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 4349bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 4359bf004c3SSatish Balay ba=b->a; 436bba1ac68SSatish Balay } else col += in[j]%bs; 4378295de27SSatish Balay } else col = in[j]; 43857b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 43990da58bdSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 44090da58bdSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 44157b952d6SSatish Balay } 44257b952d6SSatish Balay } 443d64ed03dSBarry Smith } else { 44490f02eecSBarry Smith if (!baij->donotstash) { 445ff2fd236SBarry Smith if (roworiented) { 4466fa18ffdSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 447ff2fd236SBarry Smith } else { 4486fa18ffdSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 44957b952d6SSatish Balay } 45057b952d6SSatish Balay } 45157b952d6SSatish Balay } 45290f02eecSBarry Smith } 4533a40ed3dSBarry Smith PetscFunctionReturn(0); 45457b952d6SSatish Balay } 45557b952d6SSatish Balay 4564a2ae208SSatish Balay #undef __FUNCT__ 4574a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 458f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 459ab26458aSBarry Smith { 460ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 461f15d580aSBarry Smith const MatScalar *value; 462f15d580aSBarry Smith MatScalar *barray=baij->barray; 463273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 464273d9f13SBarry Smith int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 465ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 466ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 467ab26458aSBarry Smith 468b16ae2b1SBarry Smith PetscFunctionBegin; 46930793edcSSatish Balay if(!barray) { 47082502324SSatish Balay ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 47182502324SSatish Balay baij->barray = barray; 47230793edcSSatish Balay } 47330793edcSSatish Balay 474ab26458aSBarry Smith if (roworiented) { 475ab26458aSBarry Smith stepval = (n-1)*bs; 476ab26458aSBarry Smith } else { 477ab26458aSBarry Smith stepval = (m-1)*bs; 478ab26458aSBarry Smith } 479ab26458aSBarry Smith for (i=0; i<m; i++) { 4805ef9f2a5SBarry Smith if (im[i] < 0) continue; 481aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 482590ac198SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 483ab26458aSBarry Smith #endif 484ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 485ab26458aSBarry Smith row = im[i] - rstart; 486ab26458aSBarry Smith for (j=0; j<n; j++) { 48715b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 48815b57d14SSatish Balay if ((roworiented) && (n == 1)) { 489f15d580aSBarry Smith barray = (MatScalar*)v + i*bs2; 49015b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 491f15d580aSBarry Smith barray = (MatScalar*)v + j*bs2; 49215b57d14SSatish Balay } else { /* Here a copy is required */ 493ab26458aSBarry Smith if (roworiented) { 494ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 495ab26458aSBarry Smith } else { 496ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 497abef11f7SSatish Balay } 49847513183SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 49947513183SBarry Smith for (jj=0; jj<bs; jj++) { 50030793edcSSatish Balay *barray++ = *value++; 50147513183SBarry Smith } 50247513183SBarry Smith } 50330793edcSSatish Balay barray -=bs2; 50415b57d14SSatish Balay } 505abef11f7SSatish Balay 506abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 507abef11f7SSatish Balay col = in[j] - cstart; 50893fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509ab26458aSBarry Smith } 5105ef9f2a5SBarry Smith else if (in[j] < 0) continue; 511aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 512590ac198SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 513ab26458aSBarry Smith #endif 514ab26458aSBarry Smith else { 515ab26458aSBarry Smith if (mat->was_assembled) { 516ab26458aSBarry Smith if (!baij->colmap) { 517ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 518ab26458aSBarry Smith } 519a5eb4965SSatish Balay 520aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 521aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 522fa46199cSSatish Balay { int data; 5230f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 52429bbc08cSBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 525fa46199cSSatish Balay } 52648e59246SSatish Balay #else 52729bbc08cSBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 528a5eb4965SSatish Balay #endif 52948e59246SSatish Balay #endif 530aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 5310f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 532fa46199cSSatish Balay col = (col - 1)/bs; 53348e59246SSatish Balay #else 534a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 53548e59246SSatish Balay #endif 536ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 537ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 538ab26458aSBarry Smith col = in[j]; 539ab26458aSBarry Smith } 540ab26458aSBarry Smith } 541ab26458aSBarry Smith else col = in[j]; 54293fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 543ab26458aSBarry Smith } 544ab26458aSBarry Smith } 545d64ed03dSBarry Smith } else { 546ab26458aSBarry Smith if (!baij->donotstash) { 547ff2fd236SBarry Smith if (roworiented) { 5486fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549ff2fd236SBarry Smith } else { 5506fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551ff2fd236SBarry Smith } 552abef11f7SSatish Balay } 553ab26458aSBarry Smith } 554ab26458aSBarry Smith } 5553a40ed3dSBarry Smith PetscFunctionReturn(0); 556ab26458aSBarry Smith } 5576fa18ffdSBarry Smith 5580bdbc534SSatish Balay #define HASH_KEY 0.6180339887 559c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 5606fa18ffdSBarry Smith /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 5624a2ae208SSatish Balay #undef __FUNCT__ 5634a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 564f15d580aSBarry Smith int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 5650bdbc534SSatish Balay { 5660bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 5680bdbc534SSatish Balay int ierr,i,j,row,col; 569273d9f13SBarry Smith int rstart_orig=baij->rstart_bs; 570c2760754SSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 571c2760754SSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 572329f5518SBarry Smith PetscReal tmp; 5733eda8832SBarry Smith MatScalar **HD = baij->hd,value; 574aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 5754a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5764a15367fSSatish Balay #endif 5770bdbc534SSatish Balay 5780bdbc534SSatish Balay PetscFunctionBegin; 5790bdbc534SSatish Balay 5800bdbc534SSatish Balay for (i=0; i<m; i++) { 581aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 58229bbc08cSBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 583590ac198SBarry Smith if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 5840bdbc534SSatish Balay #endif 5850bdbc534SSatish Balay row = im[i]; 586c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 5870bdbc534SSatish Balay for (j=0; j<n; j++) { 5880bdbc534SSatish Balay col = in[j]; 5896fa18ffdSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5900bdbc534SSatish Balay /* Look up into the Hash Table */ 591c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 592c2760754SSatish Balay h1 = HASH(size,key,tmp); 5930bdbc534SSatish Balay 594c2760754SSatish Balay 595c2760754SSatish Balay idx = h1; 596aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 597187ce0cbSSatish Balay insert_ct++; 598187ce0cbSSatish Balay total_ct++; 599187ce0cbSSatish Balay if (HT[idx] != key) { 600187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 601187ce0cbSSatish Balay if (idx == size) { 602187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 603187ce0cbSSatish Balay if (idx == h1) { 604a45adfd6SMatthew Knepley SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 605187ce0cbSSatish Balay } 606187ce0cbSSatish Balay } 607187ce0cbSSatish Balay } 608187ce0cbSSatish Balay #else 609c2760754SSatish Balay if (HT[idx] != key) { 610c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 611c2760754SSatish Balay if (idx == size) { 612c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 613c2760754SSatish Balay if (idx == h1) { 614a45adfd6SMatthew Knepley SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 615c2760754SSatish Balay } 616c2760754SSatish Balay } 617c2760754SSatish Balay } 618187ce0cbSSatish Balay #endif 619c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 620c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 621c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 6220bdbc534SSatish Balay } 6230bdbc534SSatish Balay } else { 6240bdbc534SSatish Balay if (!baij->donotstash) { 625ff2fd236SBarry Smith if (roworiented) { 6268798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 627ff2fd236SBarry Smith } else { 6288798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 6290bdbc534SSatish Balay } 6300bdbc534SSatish Balay } 6310bdbc534SSatish Balay } 6320bdbc534SSatish Balay } 633aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 634187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 635187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 636187ce0cbSSatish Balay #endif 6370bdbc534SSatish Balay PetscFunctionReturn(0); 6380bdbc534SSatish Balay } 6390bdbc534SSatish Balay 6404a2ae208SSatish Balay #undef __FUNCT__ 6414a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 642f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 6430bdbc534SSatish Balay { 6440bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 645273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 6468798bf22SSatish Balay int ierr,i,j,ii,jj,row,col; 647273d9f13SBarry Smith int rstart=baij->rstart ; 648b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 649c2760754SSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 650329f5518SBarry Smith PetscReal tmp; 6513eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 652f15d580aSBarry Smith const MatScalar *v_t,*value; 653aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 6544a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 6554a15367fSSatish Balay #endif 6560bdbc534SSatish Balay 657d0a41580SSatish Balay PetscFunctionBegin; 658d0a41580SSatish Balay 6590bdbc534SSatish Balay if (roworiented) { 6600bdbc534SSatish Balay stepval = (n-1)*bs; 6610bdbc534SSatish Balay } else { 6620bdbc534SSatish Balay stepval = (m-1)*bs; 6630bdbc534SSatish Balay } 6640bdbc534SSatish Balay for (i=0; i<m; i++) { 665aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 666590ac198SBarry Smith if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]); 667590ac198SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1); 6680bdbc534SSatish Balay #endif 6690bdbc534SSatish Balay row = im[i]; 670187ce0cbSSatish Balay v_t = v + i*bs2; 671c2760754SSatish Balay if (row >= rstart && row < rend) { 6720bdbc534SSatish Balay for (j=0; j<n; j++) { 6730bdbc534SSatish Balay col = in[j]; 6740bdbc534SSatish Balay 6750bdbc534SSatish Balay /* Look up into the Hash Table */ 676c2760754SSatish Balay key = row*Nbs+col+1; 677c2760754SSatish Balay h1 = HASH(size,key,tmp); 6780bdbc534SSatish Balay 679c2760754SSatish Balay idx = h1; 680aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 681187ce0cbSSatish Balay total_ct++; 682187ce0cbSSatish Balay insert_ct++; 683187ce0cbSSatish Balay if (HT[idx] != key) { 684187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 685187ce0cbSSatish Balay if (idx == size) { 686187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 687187ce0cbSSatish Balay if (idx == h1) { 688a45adfd6SMatthew Knepley SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 689187ce0cbSSatish Balay } 690187ce0cbSSatish Balay } 691187ce0cbSSatish Balay } 692187ce0cbSSatish Balay #else 693c2760754SSatish Balay if (HT[idx] != key) { 694c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 695c2760754SSatish Balay if (idx == size) { 696c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 697c2760754SSatish Balay if (idx == h1) { 698a45adfd6SMatthew Knepley SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 699c2760754SSatish Balay } 700c2760754SSatish Balay } 701c2760754SSatish Balay } 702187ce0cbSSatish Balay #endif 703c2760754SSatish Balay baij_a = HD[idx]; 7040bdbc534SSatish Balay if (roworiented) { 705c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 706187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 707187ce0cbSSatish Balay value = v_t; 708187ce0cbSSatish Balay v_t += bs; 709fef45726SSatish Balay if (addv == ADD_VALUES) { 710c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 711c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 712fef45726SSatish Balay baij_a[jj] += *value++; 713b4cc0f5aSSatish Balay } 714b4cc0f5aSSatish Balay } 715fef45726SSatish Balay } else { 716c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 717c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 718fef45726SSatish Balay baij_a[jj] = *value++; 719fef45726SSatish Balay } 720fef45726SSatish Balay } 721fef45726SSatish Balay } 7220bdbc534SSatish Balay } else { 7230bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 724fef45726SSatish Balay if (addv == ADD_VALUES) { 725b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 7260bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 727fef45726SSatish Balay baij_a[jj] += *value++; 728fef45726SSatish Balay } 729fef45726SSatish Balay } 730fef45726SSatish Balay } else { 731fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 732fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 733fef45726SSatish Balay baij_a[jj] = *value++; 734fef45726SSatish Balay } 735b4cc0f5aSSatish Balay } 7360bdbc534SSatish Balay } 7370bdbc534SSatish Balay } 7380bdbc534SSatish Balay } 7390bdbc534SSatish Balay } else { 7400bdbc534SSatish Balay if (!baij->donotstash) { 7410bdbc534SSatish Balay if (roworiented) { 7428798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 7430bdbc534SSatish Balay } else { 7448798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 7450bdbc534SSatish Balay } 7460bdbc534SSatish Balay } 7470bdbc534SSatish Balay } 7480bdbc534SSatish Balay } 749aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 750187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 751187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 752187ce0cbSSatish Balay #endif 7530bdbc534SSatish Balay PetscFunctionReturn(0); 7540bdbc534SSatish Balay } 755133cdb44SSatish Balay 7564a2ae208SSatish Balay #undef __FUNCT__ 7574a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ" 758f15d580aSBarry Smith int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 759d6de1c52SSatish Balay { 760d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 761d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 76248e59246SSatish Balay int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 763d6de1c52SSatish Balay 764133cdb44SSatish Balay PetscFunctionBegin; 765d6de1c52SSatish Balay for (i=0; i<m; i++) { 766590ac198SBarry Smith if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 767590ac198SBarry Smith if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 768d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 769d6de1c52SSatish Balay row = idxm[i] - bsrstart; 770d6de1c52SSatish Balay for (j=0; j<n; j++) { 771590ac198SBarry Smith if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 772590ac198SBarry Smith if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 773d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 774d6de1c52SSatish Balay col = idxn[j] - bscstart; 77598dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 776d64ed03dSBarry Smith } else { 777905e6a2fSBarry Smith if (!baij->colmap) { 778905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 779905e6a2fSBarry Smith } 780aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 7810f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 782fa46199cSSatish Balay data --; 78348e59246SSatish Balay #else 78448e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 78548e59246SSatish Balay #endif 78648e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 787d9d09a02SSatish Balay else { 78848e59246SSatish Balay col = data + idxn[j]%bs; 78998dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 790d6de1c52SSatish Balay } 791d6de1c52SSatish Balay } 792d6de1c52SSatish Balay } 793d64ed03dSBarry Smith } else { 79429bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 795d6de1c52SSatish Balay } 796d6de1c52SSatish Balay } 7973a40ed3dSBarry Smith PetscFunctionReturn(0); 798d6de1c52SSatish Balay } 799d6de1c52SSatish Balay 8004a2ae208SSatish Balay #undef __FUNCT__ 8014a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ" 802064f8208SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 803d6de1c52SSatish Balay { 804d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 805d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 806acdf5bf4SSatish Balay int ierr,i,bs2=baij->bs2; 807329f5518SBarry Smith PetscReal sum = 0.0; 8083eda8832SBarry Smith MatScalar *v; 809d6de1c52SSatish Balay 810d64ed03dSBarry Smith PetscFunctionBegin; 811d6de1c52SSatish Balay if (baij->size == 1) { 812064f8208SBarry Smith ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 813d6de1c52SSatish Balay } else { 814d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 815d6de1c52SSatish Balay v = amat->a; 816d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++) { 817aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 818329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 819d6de1c52SSatish Balay #else 820d6de1c52SSatish Balay sum += (*v)*(*v); v++; 821d6de1c52SSatish Balay #endif 822d6de1c52SSatish Balay } 823d6de1c52SSatish Balay v = bmat->a; 824d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++) { 825aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 826329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 827d6de1c52SSatish Balay #else 828d6de1c52SSatish Balay sum += (*v)*(*v); v++; 829d6de1c52SSatish Balay #endif 830d6de1c52SSatish Balay } 831064f8208SBarry Smith ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 832064f8208SBarry Smith *nrm = sqrt(*nrm); 833d64ed03dSBarry Smith } else { 83429bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 835d6de1c52SSatish Balay } 836d64ed03dSBarry Smith } 8373a40ed3dSBarry Smith PetscFunctionReturn(0); 838d6de1c52SSatish Balay } 83957b952d6SSatish Balay 840bd7f49f5SSatish Balay 841fef45726SSatish Balay /* 842fef45726SSatish Balay Creates the hash table, and sets the table 843fef45726SSatish Balay This table is created only once. 844fef45726SSatish Balay If new entried need to be added to the matrix 845fef45726SSatish Balay then the hash table has to be destroyed and 846fef45726SSatish Balay recreated. 847fef45726SSatish Balay */ 8484a2ae208SSatish Balay #undef __FUNCT__ 8494a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 850329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851596b8d2eSBarry Smith { 852596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 854596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 8550bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 856549d3d68SSatish Balay int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 857187ce0cbSSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 858fef45726SSatish Balay int *HT,key; 8593eda8832SBarry Smith MatScalar **HD; 860329f5518SBarry Smith PetscReal tmp; 861aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 8624a15367fSSatish Balay int ct=0,max=0; 8634a15367fSSatish Balay #endif 864fef45726SSatish Balay 865d64ed03dSBarry Smith PetscFunctionBegin; 8660bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 8670bdbc534SSatish Balay size = baij->ht_size; 868fef45726SSatish Balay 8690bdbc534SSatish Balay if (baij->ht) { 8700bdbc534SSatish Balay PetscFunctionReturn(0); 871596b8d2eSBarry Smith } 8720bdbc534SSatish Balay 873fef45726SSatish Balay /* Allocate Memory for Hash Table */ 874b0a32e0cSBarry Smith ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 875b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 876b9e4cc15SSatish Balay HD = baij->hd; 877a07cd24cSSatish Balay HT = baij->ht; 878b9e4cc15SSatish Balay 879b9e4cc15SSatish Balay 88087828ca2SBarry Smith ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 8810bdbc534SSatish Balay 882596b8d2eSBarry Smith 883596b8d2eSBarry Smith /* Loop Over A */ 8840bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 885596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 8860bdbc534SSatish Balay row = i+rstart; 8870bdbc534SSatish Balay col = aj[j]+cstart; 888596b8d2eSBarry Smith 889187ce0cbSSatish Balay key = row*Nbs + col + 1; 890c2760754SSatish Balay h1 = HASH(size,key,tmp); 8910bdbc534SSatish Balay for (k=0; k<size; k++){ 8920bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 8930bdbc534SSatish Balay HT[(h1+k)%size] = key; 8940bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 895596b8d2eSBarry Smith break; 896aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 897187ce0cbSSatish Balay } else { 898187ce0cbSSatish Balay ct++; 899187ce0cbSSatish Balay #endif 900596b8d2eSBarry Smith } 901187ce0cbSSatish Balay } 902aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 903187ce0cbSSatish Balay if (k> max) max = k; 904187ce0cbSSatish Balay #endif 905596b8d2eSBarry Smith } 906596b8d2eSBarry Smith } 907596b8d2eSBarry Smith /* Loop Over B */ 9080bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 909596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 9100bdbc534SSatish Balay row = i+rstart; 9110bdbc534SSatish Balay col = garray[bj[j]]; 912187ce0cbSSatish Balay key = row*Nbs + col + 1; 913c2760754SSatish Balay h1 = HASH(size,key,tmp); 9140bdbc534SSatish Balay for (k=0; k<size; k++){ 9150bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 9160bdbc534SSatish Balay HT[(h1+k)%size] = key; 9170bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 918596b8d2eSBarry Smith break; 919aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 920187ce0cbSSatish Balay } else { 921187ce0cbSSatish Balay ct++; 922187ce0cbSSatish Balay #endif 923596b8d2eSBarry Smith } 924187ce0cbSSatish Balay } 925aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 926187ce0cbSSatish Balay if (k> max) max = k; 927187ce0cbSSatish Balay #endif 928596b8d2eSBarry Smith } 929596b8d2eSBarry Smith } 930596b8d2eSBarry Smith 931596b8d2eSBarry Smith /* Print Summary */ 932aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 933c38d4ed2SBarry Smith for (i=0,j=0; i<size; i++) { 934596b8d2eSBarry Smith if (HT[i]) {j++;} 935c38d4ed2SBarry Smith } 936b0a32e0cSBarry Smith PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 937187ce0cbSSatish Balay #endif 9383a40ed3dSBarry Smith PetscFunctionReturn(0); 939596b8d2eSBarry Smith } 94057b952d6SSatish Balay 9414a2ae208SSatish Balay #undef __FUNCT__ 9424a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 943bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 944bbb85fb3SSatish Balay { 945bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 946ff2fd236SBarry Smith int ierr,nstash,reallocs; 947bbb85fb3SSatish Balay InsertMode addv; 948bbb85fb3SSatish Balay 949bbb85fb3SSatish Balay PetscFunctionBegin; 950bbb85fb3SSatish Balay if (baij->donotstash) { 951bbb85fb3SSatish Balay PetscFunctionReturn(0); 952bbb85fb3SSatish Balay } 953bbb85fb3SSatish Balay 954bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 955bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 956bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 95729bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 958bbb85fb3SSatish Balay } 959bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 960bbb85fb3SSatish Balay 9618798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 9628798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 9638798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 964b0a32e0cSBarry Smith PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 96546680499SSatish Balay ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 966b0a32e0cSBarry Smith PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 967bbb85fb3SSatish Balay PetscFunctionReturn(0); 968bbb85fb3SSatish Balay } 969bbb85fb3SSatish Balay 9704a2ae208SSatish Balay #undef __FUNCT__ 9714a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 972bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 973bbb85fb3SSatish Balay { 974bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 975a2d1c673SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 976a2d1c673SSatish Balay int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 9777c922b88SBarry Smith int *row,*col,other_disassembled; 9787c922b88SBarry Smith PetscTruth r1,r2,r3; 9793eda8832SBarry Smith MatScalar *val; 980bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 981bbb85fb3SSatish Balay 982bbb85fb3SSatish Balay PetscFunctionBegin; 983bbb85fb3SSatish Balay if (!baij->donotstash) { 984a2d1c673SSatish Balay while (1) { 9858798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 986a2d1c673SSatish Balay if (!flg) break; 987a2d1c673SSatish Balay 988bbb85fb3SSatish Balay for (i=0; i<n;) { 989bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 990bbb85fb3SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 991bbb85fb3SSatish Balay if (j < n) ncols = j-i; 992bbb85fb3SSatish Balay else ncols = n-i; 993bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 99493fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 995bbb85fb3SSatish Balay i = j; 996bbb85fb3SSatish Balay } 997bbb85fb3SSatish Balay } 9988798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 999a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 1000a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 1001a2d1c673SSatish Balay restore the original flags */ 1002a2d1c673SSatish Balay r1 = baij->roworiented; 1003a2d1c673SSatish Balay r2 = a->roworiented; 1004a2d1c673SSatish Balay r3 = b->roworiented; 10057c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 10067c922b88SBarry Smith a->roworiented = PETSC_FALSE; 10077c922b88SBarry Smith b->roworiented = PETSC_FALSE; 1008a2d1c673SSatish Balay while (1) { 10098798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1010a2d1c673SSatish Balay if (!flg) break; 1011a2d1c673SSatish Balay 1012a2d1c673SSatish Balay for (i=0; i<n;) { 1013a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 1014a2d1c673SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1015a2d1c673SSatish Balay if (j < n) ncols = j-i; 1016a2d1c673SSatish Balay else ncols = n-i; 101793fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1018a2d1c673SSatish Balay i = j; 1019a2d1c673SSatish Balay } 1020a2d1c673SSatish Balay } 10218798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1022a2d1c673SSatish Balay baij->roworiented = r1; 1023a2d1c673SSatish Balay a->roworiented = r2; 1024a2d1c673SSatish Balay b->roworiented = r3; 1025bbb85fb3SSatish Balay } 1026bbb85fb3SSatish Balay 1027bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1028bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1029bbb85fb3SSatish Balay 1030bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 1031bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 1032bbb85fb3SSatish Balay /* 1033bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 1034bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 1035bbb85fb3SSatish Balay */ 1036bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1037bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1038bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 1039bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1040bbb85fb3SSatish Balay } 1041bbb85fb3SSatish Balay } 1042bbb85fb3SSatish Balay 1043bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1044bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1045bbb85fb3SSatish Balay } 1046bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1047bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1048bbb85fb3SSatish Balay 1049aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1050bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1051f6275e2eSBarry Smith PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1052bbb85fb3SSatish Balay baij->ht_total_ct = 0; 1053bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 1054bbb85fb3SSatish Balay } 1055bbb85fb3SSatish Balay #endif 1056bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1057bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1058bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1059bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1060bbb85fb3SSatish Balay } 1061bbb85fb3SSatish Balay 1062606d414cSSatish Balay if (baij->rowvalues) { 1063606d414cSSatish Balay ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1064606d414cSSatish Balay baij->rowvalues = 0; 1065606d414cSSatish Balay } 1066bbb85fb3SSatish Balay PetscFunctionReturn(0); 1067bbb85fb3SSatish Balay } 106857b952d6SSatish Balay 10694a2ae208SSatish Balay #undef __FUNCT__ 10704a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1071b0a32e0cSBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 107257b952d6SSatish Balay { 107357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1074fb9695e5SSatish Balay int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 10756831982aSBarry Smith PetscTruth isascii,isdraw; 1076b0a32e0cSBarry Smith PetscViewer sviewer; 1077f3ef73ceSBarry Smith PetscViewerFormat format; 107857b952d6SSatish Balay 1079d64ed03dSBarry Smith PetscFunctionBegin; 1080f81bd7ccSHong Zhang /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1081b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1082fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 10830f5bd95cSBarry Smith if (isascii) { 1084b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1085456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 10864e220ebcSLois Curfman McInnes MatInfo info; 1087d132466eSBarry Smith ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1088d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1089b0a32e0cSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1090273d9f13SBarry Smith rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 10916831982aSBarry Smith baij->bs,(int)info.memory);CHKERRQ(ierr); 1092d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1093b0a32e0cSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1094d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1095b0a32e0cSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1096b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 109757b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 10983a40ed3dSBarry Smith PetscFunctionReturn(0); 1099fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 1100b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 11013a40ed3dSBarry Smith PetscFunctionReturn(0); 110204929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 110304929863SHong Zhang PetscFunctionReturn(0); 110457b952d6SSatish Balay } 110557b952d6SSatish Balay } 110657b952d6SSatish Balay 11070f5bd95cSBarry Smith if (isdraw) { 1108b0a32e0cSBarry Smith PetscDraw draw; 110957b952d6SSatish Balay PetscTruth isnull; 1110b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1111b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 111257b952d6SSatish Balay } 111357b952d6SSatish Balay 111457b952d6SSatish Balay if (size == 1) { 1115873048abSBarry Smith ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 111657b952d6SSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1117d64ed03dSBarry Smith } else { 111857b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 111957b952d6SSatish Balay Mat A; 112057b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 1121273d9f13SBarry Smith int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 11223eda8832SBarry Smith MatScalar *a; 112357b952d6SSatish Balay 1124*f204ca49SKris Buschelman /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1125*f204ca49SKris Buschelman /* Perhaps this should be the type of mat? */ 112657b952d6SSatish Balay if (!rank) { 1127*f204ca49SKris Buschelman ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1128d64ed03dSBarry Smith } else { 1129*f204ca49SKris Buschelman ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 113057b952d6SSatish Balay } 1131*f204ca49SKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1132*f204ca49SKris Buschelman ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1133b0a32e0cSBarry Smith PetscLogObjectParent(mat,A); 113457b952d6SSatish Balay 113557b952d6SSatish Balay /* copy over the A part */ 113657b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 113757b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 113882502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 113957b952d6SSatish Balay 114057b952d6SSatish Balay for (i=0; i<mbs; i++) { 114157b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 114257b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 114357b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 114457b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 114557b952d6SSatish Balay for (k=0; k<bs; k++) { 114693fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1147cee3aa6bSSatish Balay col++; a += bs; 114857b952d6SSatish Balay } 114957b952d6SSatish Balay } 115057b952d6SSatish Balay } 115157b952d6SSatish Balay /* copy over the B part */ 115257b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 115357b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 115457b952d6SSatish Balay for (i=0; i<mbs; i++) { 115557b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 115657b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 115757b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 115857b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 115957b952d6SSatish Balay for (k=0; k<bs; k++) { 116093fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1161cee3aa6bSSatish Balay col++; a += bs; 116257b952d6SSatish Balay } 116357b952d6SSatish Balay } 116457b952d6SSatish Balay } 1165606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 11666d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11676d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 116855843e3eSBarry Smith /* 116955843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1170b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 117155843e3eSBarry Smith */ 1172b0a32e0cSBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1173f14a1c24SBarry Smith if (!rank) { 1174e36acaf3SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 11756831982aSBarry Smith ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 117657b952d6SSatish Balay } 1177b0a32e0cSBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 117857b952d6SSatish Balay ierr = MatDestroy(A);CHKERRQ(ierr); 117957b952d6SSatish Balay } 11803a40ed3dSBarry Smith PetscFunctionReturn(0); 118157b952d6SSatish Balay } 118257b952d6SSatish Balay 11834a2ae208SSatish Balay #undef __FUNCT__ 11844a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ" 1185b0a32e0cSBarry Smith int MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 118657b952d6SSatish Balay { 118757b952d6SSatish Balay int ierr; 11886831982aSBarry Smith PetscTruth isascii,isdraw,issocket,isbinary; 118957b952d6SSatish Balay 1190d64ed03dSBarry Smith PetscFunctionBegin; 1191b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1192fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1193b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1194fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 11950f5bd95cSBarry Smith if (isascii || isdraw || issocket || isbinary) { 11967b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 11975cd90555SBarry Smith } else { 119829bbc08cSBarry Smith SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 119957b952d6SSatish Balay } 12003a40ed3dSBarry Smith PetscFunctionReturn(0); 120157b952d6SSatish Balay } 120257b952d6SSatish Balay 12034a2ae208SSatish Balay #undef __FUNCT__ 12044a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ" 1205e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat) 120679bdfe76SSatish Balay { 120779bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 120879bdfe76SSatish Balay int ierr; 120979bdfe76SSatish Balay 1210d64ed03dSBarry Smith PetscFunctionBegin; 1211aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1212b0a32e0cSBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 121379bdfe76SSatish Balay #endif 12148798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 12158798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1216606d414cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 121779bdfe76SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 121879bdfe76SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1219aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 12200f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 122148e59246SSatish Balay #else 1222606d414cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 122348e59246SSatish Balay #endif 1224606d414cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1225606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1226606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1227606d414cSSatish Balay if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1228606d414cSSatish Balay if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1229606d414cSSatish Balay if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 12306fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 12316fa18ffdSBarry Smith if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 12326fa18ffdSBarry Smith #endif 1233606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 12343a40ed3dSBarry Smith PetscFunctionReturn(0); 123579bdfe76SSatish Balay } 123679bdfe76SSatish Balay 12374a2ae208SSatish Balay #undef __FUNCT__ 12384a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ" 1239ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1240cee3aa6bSSatish Balay { 1241cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 124247b4a8eaSLois Curfman McInnes int ierr,nt; 1243cee3aa6bSSatish Balay 1244d64ed03dSBarry Smith PetscFunctionBegin; 1245e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1246273d9f13SBarry Smith if (nt != A->n) { 124729bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 124847b4a8eaSLois Curfman McInnes } 1249e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1250273d9f13SBarry Smith if (nt != A->m) { 125129bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 125247b4a8eaSLois Curfman McInnes } 125343a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1254f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 125543a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1256f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 125743a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 12583a40ed3dSBarry Smith PetscFunctionReturn(0); 1259cee3aa6bSSatish Balay } 1260cee3aa6bSSatish Balay 12614a2ae208SSatish Balay #undef __FUNCT__ 12624a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1263ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1264cee3aa6bSSatish Balay { 1265cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1266cee3aa6bSSatish Balay int ierr; 1267d64ed03dSBarry Smith 1268d64ed03dSBarry Smith PetscFunctionBegin; 126943a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1270f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 127143a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1272f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 12733a40ed3dSBarry Smith PetscFunctionReturn(0); 1274cee3aa6bSSatish Balay } 1275cee3aa6bSSatish Balay 12764a2ae208SSatish Balay #undef __FUNCT__ 12774a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 12787c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1279cee3aa6bSSatish Balay { 1280cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1281cee3aa6bSSatish Balay int ierr; 1282cee3aa6bSSatish Balay 1283d64ed03dSBarry Smith PetscFunctionBegin; 1284cee3aa6bSSatish Balay /* do nondiagonal part */ 12857c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1286cee3aa6bSSatish Balay /* send it on its way */ 1287537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1288cee3aa6bSSatish Balay /* do local part */ 12897c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1290cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1291cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1292cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1293639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 12943a40ed3dSBarry Smith PetscFunctionReturn(0); 1295cee3aa6bSSatish Balay } 1296cee3aa6bSSatish Balay 12974a2ae208SSatish Balay #undef __FUNCT__ 12984a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 12997c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1300cee3aa6bSSatish Balay { 1301cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1302cee3aa6bSSatish Balay int ierr; 1303cee3aa6bSSatish Balay 1304d64ed03dSBarry Smith PetscFunctionBegin; 1305cee3aa6bSSatish Balay /* do nondiagonal part */ 13067c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1307cee3aa6bSSatish Balay /* send it on its way */ 1308537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1309cee3aa6bSSatish Balay /* do local part */ 13107c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1311cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1312cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1313cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1314537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 13153a40ed3dSBarry Smith PetscFunctionReturn(0); 1316cee3aa6bSSatish Balay } 1317cee3aa6bSSatish Balay 1318cee3aa6bSSatish Balay /* 1319cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1320cee3aa6bSSatish Balay diagonal block 1321cee3aa6bSSatish Balay */ 13224a2ae208SSatish Balay #undef __FUNCT__ 13234a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1324ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1325cee3aa6bSSatish Balay { 1326cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 13273a40ed3dSBarry Smith int ierr; 1328d64ed03dSBarry Smith 1329d64ed03dSBarry Smith PetscFunctionBegin; 1330273d9f13SBarry Smith if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 13313a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 13323a40ed3dSBarry Smith PetscFunctionReturn(0); 1333cee3aa6bSSatish Balay } 1334cee3aa6bSSatish Balay 13354a2ae208SSatish Balay #undef __FUNCT__ 13364a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ" 1337268466fbSBarry Smith int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1338cee3aa6bSSatish Balay { 1339cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1340cee3aa6bSSatish Balay int ierr; 1341d64ed03dSBarry Smith 1342d64ed03dSBarry Smith PetscFunctionBegin; 1343cee3aa6bSSatish Balay ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1344cee3aa6bSSatish Balay ierr = MatScale(aa,a->B);CHKERRQ(ierr); 13453a40ed3dSBarry Smith PetscFunctionReturn(0); 1346cee3aa6bSSatish Balay } 1347026e39d0SSatish Balay 13484a2ae208SSatish Balay #undef __FUNCT__ 13494a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ" 135087828ca2SBarry Smith int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1351acdf5bf4SSatish Balay { 1352acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 135387828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1354acdf5bf4SSatish Balay int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1355d9d09a02SSatish Balay int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1356d9d09a02SSatish Balay int *cmap,*idx_p,cstart = mat->cstart; 1357acdf5bf4SSatish Balay 1358d64ed03dSBarry Smith PetscFunctionBegin; 135929bbc08cSBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1360acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1361acdf5bf4SSatish Balay 1362acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1363acdf5bf4SSatish Balay /* 1364acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1365acdf5bf4SSatish Balay */ 1366acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1367bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1368bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1369acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1370acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1371acdf5bf4SSatish Balay } 137287828ca2SBarry Smith ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1373acdf5bf4SSatish Balay mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1374acdf5bf4SSatish Balay } 1375acdf5bf4SSatish Balay 137629bbc08cSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1377d9d09a02SSatish Balay lrow = row - brstart; 1378acdf5bf4SSatish Balay 1379acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1380acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1381acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1382f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1383f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1384acdf5bf4SSatish Balay nztot = nzA + nzB; 1385acdf5bf4SSatish Balay 1386acdf5bf4SSatish Balay cmap = mat->garray; 1387acdf5bf4SSatish Balay if (v || idx) { 1388acdf5bf4SSatish Balay if (nztot) { 1389acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1390acdf5bf4SSatish Balay int imark = -1; 1391acdf5bf4SSatish Balay if (v) { 1392acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1393acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1394d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1395acdf5bf4SSatish Balay else break; 1396acdf5bf4SSatish Balay } 1397acdf5bf4SSatish Balay imark = i; 1398acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1399acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1400acdf5bf4SSatish Balay } 1401acdf5bf4SSatish Balay if (idx) { 1402acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1403acdf5bf4SSatish Balay if (imark > -1) { 1404acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1405bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1406acdf5bf4SSatish Balay } 1407acdf5bf4SSatish Balay } else { 1408acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1409d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1410d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1411acdf5bf4SSatish Balay else break; 1412acdf5bf4SSatish Balay } 1413acdf5bf4SSatish Balay imark = i; 1414acdf5bf4SSatish Balay } 1415d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1416d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1417acdf5bf4SSatish Balay } 1418d64ed03dSBarry Smith } else { 1419d212a18eSSatish Balay if (idx) *idx = 0; 1420d212a18eSSatish Balay if (v) *v = 0; 1421d212a18eSSatish Balay } 1422acdf5bf4SSatish Balay } 1423acdf5bf4SSatish Balay *nz = nztot; 1424f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1425f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14263a40ed3dSBarry Smith PetscFunctionReturn(0); 1427acdf5bf4SSatish Balay } 1428acdf5bf4SSatish Balay 14294a2ae208SSatish Balay #undef __FUNCT__ 14304a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 143187828ca2SBarry Smith int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1432acdf5bf4SSatish Balay { 1433acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1434d64ed03dSBarry Smith 1435d64ed03dSBarry Smith PetscFunctionBegin; 1436acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 143729bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1438acdf5bf4SSatish Balay } 1439acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14403a40ed3dSBarry Smith PetscFunctionReturn(0); 1441acdf5bf4SSatish Balay } 1442acdf5bf4SSatish Balay 14434a2ae208SSatish Balay #undef __FUNCT__ 14444a2ae208SSatish Balay #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1445ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 14465a838052SSatish Balay { 14475a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1448d64ed03dSBarry Smith 1449d64ed03dSBarry Smith PetscFunctionBegin; 14505a838052SSatish Balay *bs = baij->bs; 14513a40ed3dSBarry Smith PetscFunctionReturn(0); 14525a838052SSatish Balay } 14535a838052SSatish Balay 14544a2ae208SSatish Balay #undef __FUNCT__ 14554a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1456ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 145758667388SSatish Balay { 145858667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 145958667388SSatish Balay int ierr; 1460d64ed03dSBarry Smith 1461d64ed03dSBarry Smith PetscFunctionBegin; 146258667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 146358667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14643a40ed3dSBarry Smith PetscFunctionReturn(0); 146558667388SSatish Balay } 14660ac07820SSatish Balay 14674a2ae208SSatish Balay #undef __FUNCT__ 14684a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1469ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14700ac07820SSatish Balay { 14714e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14724e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 14737d57db60SLois Curfman McInnes int ierr; 1474329f5518SBarry Smith PetscReal isend[5],irecv[5]; 14750ac07820SSatish Balay 1476d64ed03dSBarry Smith PetscFunctionBegin; 1477f6275e2eSBarry Smith info->block_size = (PetscReal)a->bs; 14784e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 14790e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1480de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 14814e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 14820e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1483de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 14840ac07820SSatish Balay if (flag == MAT_LOCAL) { 14854e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 14864e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 14874e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 14884e220ebcSLois Curfman McInnes info->memory = isend[3]; 14894e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 14900ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1491d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 14924e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14934e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14944e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14954e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14964e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 14970ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1498d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 14994e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15004e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15014e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15024e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15034e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1504d41123aaSBarry Smith } else { 150529bbc08cSBarry Smith SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 15060ac07820SSatish Balay } 1507f6275e2eSBarry Smith info->rows_global = (PetscReal)A->M; 1508f6275e2eSBarry Smith info->columns_global = (PetscReal)A->N; 1509f6275e2eSBarry Smith info->rows_local = (PetscReal)A->m; 1510f6275e2eSBarry Smith info->columns_local = (PetscReal)A->N; 15114e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 15124e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 15134e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 15143a40ed3dSBarry Smith PetscFunctionReturn(0); 15150ac07820SSatish Balay } 15160ac07820SSatish Balay 15174a2ae208SSatish Balay #undef __FUNCT__ 15184a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ" 1519ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 152058667388SSatish Balay { 152158667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 152298305bb5SBarry Smith int ierr; 152358667388SSatish Balay 1524d64ed03dSBarry Smith PetscFunctionBegin; 152512c028f9SKris Buschelman switch (op) { 152612c028f9SKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 152712c028f9SKris Buschelman case MAT_YES_NEW_NONZERO_LOCATIONS: 152812c028f9SKris Buschelman case MAT_COLUMNS_UNSORTED: 152912c028f9SKris Buschelman case MAT_COLUMNS_SORTED: 153012c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 153112c028f9SKris Buschelman case MAT_KEEP_ZEROED_ROWS: 153212c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 153398305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 153498305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 153512c028f9SKris Buschelman break; 153612c028f9SKris Buschelman case MAT_ROW_ORIENTED: 15377c922b88SBarry Smith a->roworiented = PETSC_TRUE; 153898305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 153998305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 154012c028f9SKris Buschelman break; 154112c028f9SKris Buschelman case MAT_ROWS_SORTED: 154212c028f9SKris Buschelman case MAT_ROWS_UNSORTED: 154312c028f9SKris Buschelman case MAT_YES_NEW_DIAGONALS: 1544b0a32e0cSBarry Smith PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 154512c028f9SKris Buschelman break; 154612c028f9SKris Buschelman case MAT_COLUMN_ORIENTED: 15477c922b88SBarry Smith a->roworiented = PETSC_FALSE; 154898305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 154998305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 155012c028f9SKris Buschelman break; 155112c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 15527c922b88SBarry Smith a->donotstash = PETSC_TRUE; 155312c028f9SKris Buschelman break; 155412c028f9SKris Buschelman case MAT_NO_NEW_DIAGONALS: 155529bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 155612c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 15577c922b88SBarry Smith a->ht_flag = PETSC_TRUE; 155812c028f9SKris Buschelman break; 155977e54ba9SKris Buschelman case MAT_SYMMETRIC: 156077e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 15619a4540c5SBarry Smith case MAT_NOT_SYMMETRIC: 15629a4540c5SBarry Smith case MAT_NOT_STRUCTURALLY_SYMMETRIC: 15639a4540c5SBarry Smith case MAT_HERMITIAN: 15649a4540c5SBarry Smith case MAT_NOT_HERMITIAN: 15659a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 15669a4540c5SBarry Smith case MAT_NOT_SYMMETRY_ETERNAL: 156777e54ba9SKris Buschelman break; 156812c028f9SKris Buschelman default: 156929bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"unknown option"); 1570d64ed03dSBarry Smith } 15713a40ed3dSBarry Smith PetscFunctionReturn(0); 157258667388SSatish Balay } 157358667388SSatish Balay 15744a2ae208SSatish Balay #undef __FUNCT__ 15754a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1576ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 15770ac07820SSatish Balay { 15780ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 15790ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15800ac07820SSatish Balay Mat B; 1581273d9f13SBarry Smith int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 15820ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 15833eda8832SBarry Smith MatScalar *a; 15840ac07820SSatish Balay 1585d64ed03dSBarry Smith PetscFunctionBegin; 158629bbc08cSBarry Smith if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1587*f204ca49SKris Buschelman ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1588*f204ca49SKris Buschelman ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1589*f204ca49SKris Buschelman ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 15900ac07820SSatish Balay 15910ac07820SSatish Balay /* copy over the A part */ 15920ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 15930ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 159482502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 15950ac07820SSatish Balay 15960ac07820SSatish Balay for (i=0; i<mbs; i++) { 15970ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 15980ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 15990ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16000ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 16010ac07820SSatish Balay for (k=0; k<bs; k++) { 160293fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16030ac07820SSatish Balay col++; a += bs; 16040ac07820SSatish Balay } 16050ac07820SSatish Balay } 16060ac07820SSatish Balay } 16070ac07820SSatish Balay /* copy over the B part */ 16080ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 16090ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 16100ac07820SSatish Balay for (i=0; i<mbs; i++) { 16110ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 16120ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16130ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16140ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 16150ac07820SSatish Balay for (k=0; k<bs; k++) { 161693fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16170ac07820SSatish Balay col++; a += bs; 16180ac07820SSatish Balay } 16190ac07820SSatish Balay } 16200ac07820SSatish Balay } 1621606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 16220ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16230ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16240ac07820SSatish Balay 16257c922b88SBarry Smith if (matout) { 16260ac07820SSatish Balay *matout = B; 16270ac07820SSatish Balay } else { 1628273d9f13SBarry Smith ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 16290ac07820SSatish Balay } 16303a40ed3dSBarry Smith PetscFunctionReturn(0); 16310ac07820SSatish Balay } 16320e95ebc0SSatish Balay 16334a2ae208SSatish Balay #undef __FUNCT__ 16344a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 163536c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 16360e95ebc0SSatish Balay { 163736c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 163836c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 16390e95ebc0SSatish Balay int ierr,s1,s2,s3; 16400e95ebc0SSatish Balay 1641d64ed03dSBarry Smith PetscFunctionBegin; 164236c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 164336c4a09eSSatish Balay if (rr) { 164436c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 164529bbc08cSBarry Smith if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 164636c4a09eSSatish Balay /* Overlap communication with computation. */ 164736c4a09eSSatish Balay ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 164836c4a09eSSatish Balay } 16490e95ebc0SSatish Balay if (ll) { 16500e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 165129bbc08cSBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1652a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 16530e95ebc0SSatish Balay } 165436c4a09eSSatish Balay /* scale the diagonal block */ 165536c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 165636c4a09eSSatish Balay 165736c4a09eSSatish Balay if (rr) { 165836c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 165936c4a09eSSatish Balay ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1660a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 166136c4a09eSSatish Balay } 166236c4a09eSSatish Balay 16633a40ed3dSBarry Smith PetscFunctionReturn(0); 16640e95ebc0SSatish Balay } 16650e95ebc0SSatish Balay 16664a2ae208SSatish Balay #undef __FUNCT__ 16674a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1668268466fbSBarry Smith int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 16690ac07820SSatish Balay { 16700ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 16710ac07820SSatish Balay int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1672c1dc657dSBarry Smith int *nprocs,j,idx,nsends,row; 16730ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 16740ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1675a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 16760ac07820SSatish Balay MPI_Comm comm = A->comm; 16770ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 16780ac07820SSatish Balay MPI_Status recv_status,*send_status; 16790ac07820SSatish Balay IS istmp; 168035d8aa7fSBarry Smith PetscTruth found; 16810ac07820SSatish Balay 1682d64ed03dSBarry Smith PetscFunctionBegin; 1683f14a1c24SBarry Smith ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 16840ac07820SSatish Balay ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 16850ac07820SSatish Balay 16860ac07820SSatish Balay /* first count number of contributors to each processor */ 168782502324SSatish Balay ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1688549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1689b0a32e0cSBarry Smith ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 16900ac07820SSatish Balay for (i=0; i<N; i++) { 16910ac07820SSatish Balay idx = rows[i]; 169235d8aa7fSBarry Smith found = PETSC_FALSE; 16930ac07820SSatish Balay for (j=0; j<size; j++) { 16940ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1695c1dc657dSBarry Smith nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 16960ac07820SSatish Balay } 16970ac07820SSatish Balay } 169829bbc08cSBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 16990ac07820SSatish Balay } 1700c1dc657dSBarry Smith nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 17010ac07820SSatish Balay 17020ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 1703c1dc657dSBarry Smith ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 17040ac07820SSatish Balay 17050ac07820SSatish Balay /* post receives: */ 1706b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1707b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 17080ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 1709ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 17100ac07820SSatish Balay } 17110ac07820SSatish Balay 17120ac07820SSatish Balay /* do sends: 17130ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 17140ac07820SSatish Balay the ith processor 17150ac07820SSatish Balay */ 1716b0a32e0cSBarry Smith ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1717b0a32e0cSBarry Smith ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1718b0a32e0cSBarry Smith ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 17190ac07820SSatish Balay starts[0] = 0; 1720c1dc657dSBarry Smith for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17210ac07820SSatish Balay for (i=0; i<N; i++) { 17220ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 17230ac07820SSatish Balay } 17246831982aSBarry Smith ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 17250ac07820SSatish Balay 17260ac07820SSatish Balay starts[0] = 0; 1727c1dc657dSBarry Smith for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17280ac07820SSatish Balay count = 0; 17290ac07820SSatish Balay for (i=0; i<size; i++) { 1730c1dc657dSBarry Smith if (nprocs[2*i+1]) { 1731c1dc657dSBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 17320ac07820SSatish Balay } 17330ac07820SSatish Balay } 1734606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 17350ac07820SSatish Balay 17360ac07820SSatish Balay base = owners[rank]*bs; 17370ac07820SSatish Balay 17380ac07820SSatish Balay /* wait on receives */ 1739b0a32e0cSBarry Smith ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 17400ac07820SSatish Balay source = lens + nrecvs; 17410ac07820SSatish Balay count = nrecvs; slen = 0; 17420ac07820SSatish Balay while (count) { 1743ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17440ac07820SSatish Balay /* unpack receives into our local space */ 1745ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 17460ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17470ac07820SSatish Balay lens[imdex] = n; 17480ac07820SSatish Balay slen += n; 17490ac07820SSatish Balay count--; 17500ac07820SSatish Balay } 1751606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 17520ac07820SSatish Balay 17530ac07820SSatish Balay /* move the data into the send scatter */ 1754b0a32e0cSBarry Smith ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 17550ac07820SSatish Balay count = 0; 17560ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 17570ac07820SSatish Balay values = rvalues + i*nmax; 17580ac07820SSatish Balay for (j=0; j<lens[i]; j++) { 17590ac07820SSatish Balay lrows[count++] = values[j] - base; 17600ac07820SSatish Balay } 17610ac07820SSatish Balay } 1762606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 1763606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1764606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 1765606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 17660ac07820SSatish Balay 17670ac07820SSatish Balay /* actually zap the local rows */ 1768029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1769b0a32e0cSBarry Smith PetscLogObjectParent(A,istmp); 1770a07cd24cSSatish Balay 177172dacd9aSBarry Smith /* 177272dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 177372dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 177472dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 177572dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 177672dacd9aSBarry Smith 177772dacd9aSBarry Smith Contributed by: Mathew Knepley 177872dacd9aSBarry Smith */ 17799c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 17806fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 17819c957beeSSatish Balay if (diag && (l->A->M == l->A->N)) { 17826fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 17839c957beeSSatish Balay } else if (diag) { 17846fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1785fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 178629bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1787fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 17886525c446SSatish Balay } 1789a07cd24cSSatish Balay for (i=0; i<slen; i++) { 1790a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 17913f11ea53SBarry Smith ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1792a07cd24cSSatish Balay } 1793a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1794a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17959c957beeSSatish Balay } else { 17966fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1797a07cd24cSSatish Balay } 17989c957beeSSatish Balay 17999c957beeSSatish Balay ierr = ISDestroy(istmp);CHKERRQ(ierr); 1800606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 1801a07cd24cSSatish Balay 18020ac07820SSatish Balay /* wait on sends */ 18030ac07820SSatish Balay if (nsends) { 180482502324SSatish Balay ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1805ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1806606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 18070ac07820SSatish Balay } 1808606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 1809606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 18100ac07820SSatish Balay 18113a40ed3dSBarry Smith PetscFunctionReturn(0); 18120ac07820SSatish Balay } 181372dacd9aSBarry Smith 18144a2ae208SSatish Balay #undef __FUNCT__ 18154a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1816ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1817ba4ca20aSSatish Balay { 1818ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 181925fdafccSSatish Balay MPI_Comm comm = A->comm; 1820133cdb44SSatish Balay static int called = 0; 18213a40ed3dSBarry Smith int ierr; 1822ba4ca20aSSatish Balay 1823d64ed03dSBarry Smith PetscFunctionBegin; 18243a40ed3dSBarry Smith if (!a->rank) { 18253a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 182625fdafccSSatish Balay } 182725fdafccSSatish Balay if (called) {PetscFunctionReturn(0);} else called = 1; 1828d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1829d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 18303a40ed3dSBarry Smith PetscFunctionReturn(0); 1831ba4ca20aSSatish Balay } 18320ac07820SSatish Balay 18334a2ae208SSatish Balay #undef __FUNCT__ 18344a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1835ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1836bb5a7306SBarry Smith { 1837bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1838bb5a7306SBarry Smith int ierr; 1839d64ed03dSBarry Smith 1840d64ed03dSBarry Smith PetscFunctionBegin; 1841bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 18423a40ed3dSBarry Smith PetscFunctionReturn(0); 1843bb5a7306SBarry Smith } 1844bb5a7306SBarry Smith 18452e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18460ac07820SSatish Balay 18474a2ae208SSatish Balay #undef __FUNCT__ 18484a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ" 18497fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 18507fc3c18eSBarry Smith { 18517fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 18527fc3c18eSBarry Smith Mat a,b,c,d; 18537fc3c18eSBarry Smith PetscTruth flg; 18547fc3c18eSBarry Smith int ierr; 18557fc3c18eSBarry Smith 18567fc3c18eSBarry Smith PetscFunctionBegin; 18577fc3c18eSBarry Smith a = matA->A; b = matA->B; 18587fc3c18eSBarry Smith c = matB->A; d = matB->B; 18597fc3c18eSBarry Smith 18607fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 18617fc3c18eSBarry Smith if (flg == PETSC_TRUE) { 18627fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18637fc3c18eSBarry Smith } 18647fc3c18eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 18657fc3c18eSBarry Smith PetscFunctionReturn(0); 18667fc3c18eSBarry Smith } 18677fc3c18eSBarry Smith 1868273d9f13SBarry Smith 18694a2ae208SSatish Balay #undef __FUNCT__ 18704a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1871273d9f13SBarry Smith int MatSetUpPreallocation_MPIBAIJ(Mat A) 1872273d9f13SBarry Smith { 1873273d9f13SBarry Smith int ierr; 1874273d9f13SBarry Smith 1875273d9f13SBarry Smith PetscFunctionBegin; 1876273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1877273d9f13SBarry Smith PetscFunctionReturn(0); 1878273d9f13SBarry Smith } 1879273d9f13SBarry Smith 188079bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1881cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1882cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1883cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1884cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1885cc2dc46cSBarry Smith MatMult_MPIBAIJ, 188697304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ, 18877c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 18887c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 1889cc2dc46cSBarry Smith 0, 1890cc2dc46cSBarry Smith 0, 1891cc2dc46cSBarry Smith 0, 189297304618SKris Buschelman /*10*/ 0, 1893cc2dc46cSBarry Smith 0, 1894cc2dc46cSBarry Smith 0, 1895cc2dc46cSBarry Smith 0, 1896cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 189797304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ, 18987fc3c18eSBarry Smith MatEqual_MPIBAIJ, 1899cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1900cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1901cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 190297304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ, 1903cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1904cc2dc46cSBarry Smith 0, 1905cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1906cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 190797304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ, 1908cc2dc46cSBarry Smith 0, 1909cc2dc46cSBarry Smith 0, 1910cc2dc46cSBarry Smith 0, 1911cc2dc46cSBarry Smith 0, 191297304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ, 1913273d9f13SBarry Smith 0, 1914cc2dc46cSBarry Smith 0, 1915cc2dc46cSBarry Smith 0, 1916cc2dc46cSBarry Smith 0, 191797304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ, 1918cc2dc46cSBarry Smith 0, 1919cc2dc46cSBarry Smith 0, 1920cc2dc46cSBarry Smith 0, 1921cc2dc46cSBarry Smith 0, 192297304618SKris Buschelman /*40*/ 0, 1923cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1924cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1925cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1926cc2dc46cSBarry Smith 0, 192797304618SKris Buschelman /*45*/ MatPrintHelp_MPIBAIJ, 1928cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1929cc2dc46cSBarry Smith 0, 1930cc2dc46cSBarry Smith 0, 1931cc2dc46cSBarry Smith 0, 193297304618SKris Buschelman /*50*/ MatGetBlockSize_MPIBAIJ, 1933cc2dc46cSBarry Smith 0, 1934cc2dc46cSBarry Smith 0, 1935cc2dc46cSBarry Smith 0, 1936cc2dc46cSBarry Smith 0, 193797304618SKris Buschelman /*55*/ 0, 1938cc2dc46cSBarry Smith 0, 1939cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 1940cc2dc46cSBarry Smith 0, 1941cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 194297304618SKris Buschelman /*60*/ 0, 1943f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 1944f14a1c24SBarry Smith MatView_MPIBAIJ, 19458a124369SBarry Smith MatGetPetscMaps_Petsc, 19467843d17aSBarry Smith 0, 194797304618SKris Buschelman /*65*/ 0, 19487843d17aSBarry Smith 0, 19497843d17aSBarry Smith 0, 19507843d17aSBarry Smith 0, 19517843d17aSBarry Smith 0, 195297304618SKris Buschelman /*70*/ MatGetRowMax_MPIBAIJ, 19537843d17aSBarry Smith 0, 195497304618SKris Buschelman 0, 195597304618SKris Buschelman 0, 195697304618SKris Buschelman 0, 195797304618SKris Buschelman /*75*/ 0, 195897304618SKris Buschelman 0, 195997304618SKris Buschelman 0, 196097304618SKris Buschelman 0, 196197304618SKris Buschelman 0, 196297304618SKris Buschelman /*80*/ 0, 196397304618SKris Buschelman 0, 196497304618SKris Buschelman 0, 196597304618SKris Buschelman 0, 196697304618SKris Buschelman /*85*/ MatLoad_MPIBAIJ 196797304618SKris Buschelman }; 196879bdfe76SSatish Balay 19695ef9f2a5SBarry Smith 1970e18c124aSSatish Balay EXTERN_C_BEGIN 19714a2ae208SSatish Balay #undef __FUNCT__ 19724a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 19735ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 19745ef9f2a5SBarry Smith { 19755ef9f2a5SBarry Smith PetscFunctionBegin; 19765ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 19775ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 19785ef9f2a5SBarry Smith PetscFunctionReturn(0); 19795ef9f2a5SBarry Smith } 1980e18c124aSSatish Balay EXTERN_C_END 198179bdfe76SSatish Balay 1982273d9f13SBarry Smith EXTERN_C_BEGIN 1983d94109b8SHong Zhang extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*); 1984d94109b8SHong Zhang EXTERN_C_END 1985d94109b8SHong Zhang 1986d94109b8SHong Zhang EXTERN_C_BEGIN 19874a2ae208SSatish Balay #undef __FUNCT__ 1988a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 1989a23d5eceSKris Buschelman int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1990a23d5eceSKris Buschelman { 1991a23d5eceSKris Buschelman Mat_MPIBAIJ *b; 1992a23d5eceSKris Buschelman int ierr,i; 1993a23d5eceSKris Buschelman 1994a23d5eceSKris Buschelman PetscFunctionBegin; 1995a23d5eceSKris Buschelman B->preallocated = PETSC_TRUE; 1996a23d5eceSKris Buschelman ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1997a23d5eceSKris Buschelman 1998a23d5eceSKris Buschelman if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1999a23d5eceSKris Buschelman if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2000a23d5eceSKris Buschelman if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2001a23d5eceSKris Buschelman if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2002a23d5eceSKris Buschelman if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2003a23d5eceSKris Buschelman if (d_nnz) { 2004a23d5eceSKris Buschelman for (i=0; i<B->m/bs; i++) { 2005a23d5eceSKris Buschelman if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2006a23d5eceSKris Buschelman } 2007a23d5eceSKris Buschelman } 2008a23d5eceSKris Buschelman if (o_nnz) { 2009a23d5eceSKris Buschelman for (i=0; i<B->m/bs; i++) { 2010a23d5eceSKris Buschelman if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2011a23d5eceSKris Buschelman } 2012a23d5eceSKris Buschelman } 2013a23d5eceSKris Buschelman 2014a23d5eceSKris Buschelman ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2015a23d5eceSKris Buschelman ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2016a23d5eceSKris Buschelman ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2017a23d5eceSKris Buschelman ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2018a23d5eceSKris Buschelman 2019a23d5eceSKris Buschelman b = (Mat_MPIBAIJ*)B->data; 2020a23d5eceSKris Buschelman b->bs = bs; 2021a23d5eceSKris Buschelman b->bs2 = bs*bs; 2022a23d5eceSKris Buschelman b->mbs = B->m/bs; 2023a23d5eceSKris Buschelman b->nbs = B->n/bs; 2024a23d5eceSKris Buschelman b->Mbs = B->M/bs; 2025a23d5eceSKris Buschelman b->Nbs = B->N/bs; 2026a23d5eceSKris Buschelman 2027a23d5eceSKris Buschelman ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2028a23d5eceSKris Buschelman b->rowners[0] = 0; 2029a23d5eceSKris Buschelman for (i=2; i<=b->size; i++) { 2030a23d5eceSKris Buschelman b->rowners[i] += b->rowners[i-1]; 2031a23d5eceSKris Buschelman } 2032a23d5eceSKris Buschelman b->rstart = b->rowners[b->rank]; 2033a23d5eceSKris Buschelman b->rend = b->rowners[b->rank+1]; 2034a23d5eceSKris Buschelman 2035a23d5eceSKris Buschelman ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2036a23d5eceSKris Buschelman b->cowners[0] = 0; 2037a23d5eceSKris Buschelman for (i=2; i<=b->size; i++) { 2038a23d5eceSKris Buschelman b->cowners[i] += b->cowners[i-1]; 2039a23d5eceSKris Buschelman } 2040a23d5eceSKris Buschelman b->cstart = b->cowners[b->rank]; 2041a23d5eceSKris Buschelman b->cend = b->cowners[b->rank+1]; 2042a23d5eceSKris Buschelman 2043a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2044a23d5eceSKris Buschelman b->rowners_bs[i] = b->rowners[i]*bs; 2045a23d5eceSKris Buschelman } 2046a23d5eceSKris Buschelman b->rstart_bs = b->rstart*bs; 2047a23d5eceSKris Buschelman b->rend_bs = b->rend*bs; 2048a23d5eceSKris Buschelman b->cstart_bs = b->cstart*bs; 2049a23d5eceSKris Buschelman b->cend_bs = b->cend*bs; 2050a23d5eceSKris Buschelman 20519c097c71SKris Buschelman ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 20529c097c71SKris Buschelman ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2053c60e587dSKris Buschelman ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 20549c097c71SKris Buschelman PetscLogObjectParent(B,b->A); 20559c097c71SKris Buschelman ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 20569c097c71SKris Buschelman ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2057c60e587dSKris Buschelman ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 20589c097c71SKris Buschelman PetscLogObjectParent(B,b->B); 2059c60e587dSKris Buschelman 2060a23d5eceSKris Buschelman ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2061a23d5eceSKris Buschelman 2062a23d5eceSKris Buschelman PetscFunctionReturn(0); 2063a23d5eceSKris Buschelman } 2064a23d5eceSKris Buschelman EXTERN_C_END 2065a23d5eceSKris Buschelman 2066a23d5eceSKris Buschelman EXTERN_C_BEGIN 206792b32695SKris Buschelman EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 20685bf65638SKris Buschelman EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 206992b32695SKris Buschelman EXTERN_C_END 20705bf65638SKris Buschelman 20710bad9183SKris Buschelman /*MC 2072fafad747SKris Buschelman MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 20730bad9183SKris Buschelman 20740bad9183SKris Buschelman Options Database Keys: 20750bad9183SKris Buschelman . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 20760bad9183SKris Buschelman 20770bad9183SKris Buschelman Level: beginner 20780bad9183SKris Buschelman 20790bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ 20800bad9183SKris Buschelman M*/ 20810bad9183SKris Buschelman 208292b32695SKris Buschelman EXTERN_C_BEGIN 2083a23d5eceSKris Buschelman #undef __FUNCT__ 20844a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ" 2085273d9f13SBarry Smith int MatCreate_MPIBAIJ(Mat B) 2086273d9f13SBarry Smith { 2087273d9f13SBarry Smith Mat_MPIBAIJ *b; 2088cfce73b9SSatish Balay int ierr; 2089273d9f13SBarry Smith PetscTruth flg; 2090273d9f13SBarry Smith 2091273d9f13SBarry Smith PetscFunctionBegin; 209282502324SSatish Balay ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 209382502324SSatish Balay B->data = (void*)b; 209482502324SSatish Balay 2095273d9f13SBarry Smith ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2096273d9f13SBarry Smith ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2097273d9f13SBarry Smith B->mapping = 0; 2098273d9f13SBarry Smith B->factor = 0; 2099273d9f13SBarry Smith B->assembled = PETSC_FALSE; 2100273d9f13SBarry Smith 2101273d9f13SBarry Smith B->insertmode = NOT_SET_VALUES; 2102273d9f13SBarry Smith ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2103273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2104273d9f13SBarry Smith 2105273d9f13SBarry Smith /* build local table of row and column ownerships */ 210682502324SSatish Balay ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 2107b0a32e0cSBarry Smith PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2108273d9f13SBarry Smith b->cowners = b->rowners + b->size + 2; 2109273d9f13SBarry Smith b->rowners_bs = b->cowners + b->size + 2; 2110273d9f13SBarry Smith 2111273d9f13SBarry Smith /* build cache for off array entries formed */ 2112273d9f13SBarry Smith ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2113273d9f13SBarry Smith b->donotstash = PETSC_FALSE; 2114273d9f13SBarry Smith b->colmap = PETSC_NULL; 2115273d9f13SBarry Smith b->garray = PETSC_NULL; 2116273d9f13SBarry Smith b->roworiented = PETSC_TRUE; 2117273d9f13SBarry Smith 2118cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 2119273d9f13SBarry Smith /* stuff for MatSetValues_XXX in single precision */ 212064a35ccbSBarry Smith b->setvalueslen = 0; 2121273d9f13SBarry Smith b->setvaluescopy = PETSC_NULL; 2122273d9f13SBarry Smith #endif 2123273d9f13SBarry Smith 2124273d9f13SBarry Smith /* stuff used in block assembly */ 2125273d9f13SBarry Smith b->barray = 0; 2126273d9f13SBarry Smith 2127273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 2128273d9f13SBarry Smith b->lvec = 0; 2129273d9f13SBarry Smith b->Mvctx = 0; 2130273d9f13SBarry Smith 2131273d9f13SBarry Smith /* stuff for MatGetRow() */ 2132273d9f13SBarry Smith b->rowindices = 0; 2133273d9f13SBarry Smith b->rowvalues = 0; 2134273d9f13SBarry Smith b->getrowactive = PETSC_FALSE; 2135273d9f13SBarry Smith 2136273d9f13SBarry Smith /* hash table stuff */ 2137273d9f13SBarry Smith b->ht = 0; 2138273d9f13SBarry Smith b->hd = 0; 2139273d9f13SBarry Smith b->ht_size = 0; 2140273d9f13SBarry Smith b->ht_flag = PETSC_FALSE; 2141273d9f13SBarry Smith b->ht_fact = 0; 2142273d9f13SBarry Smith b->ht_total_ct = 0; 2143273d9f13SBarry Smith b->ht_insert_ct = 0; 2144273d9f13SBarry Smith 2145b0a32e0cSBarry Smith ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2146273d9f13SBarry Smith if (flg) { 2147f6275e2eSBarry Smith PetscReal fact = 1.39; 2148273d9f13SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 214987828ca2SBarry Smith ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2150273d9f13SBarry Smith if (fact <= 1.0) fact = 1.39; 2151273d9f13SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2152b0a32e0cSBarry Smith PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2153273d9f13SBarry Smith } 2154273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2155273d9f13SBarry Smith "MatStoreValues_MPIBAIJ", 2156273d9f13SBarry Smith MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2157273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2158273d9f13SBarry Smith "MatRetrieveValues_MPIBAIJ", 2159273d9f13SBarry Smith MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2160273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2161273d9f13SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 2162273d9f13SBarry Smith MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2163a23d5eceSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2164a23d5eceSKris Buschelman "MatMPIBAIJSetPreallocation_MPIBAIJ", 2165a23d5eceSKris Buschelman MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 216692b32695SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 216792b32695SKris Buschelman "MatDiagonalScaleLocal_MPIBAIJ", 216892b32695SKris Buschelman MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 21695bf65638SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 21705bf65638SKris Buschelman "MatSetHashTableFactor_MPIBAIJ", 21715bf65638SKris Buschelman MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2172273d9f13SBarry Smith PetscFunctionReturn(0); 2173273d9f13SBarry Smith } 2174273d9f13SBarry Smith EXTERN_C_END 2175273d9f13SBarry Smith 2176209238afSKris Buschelman /*MC 2177002d173eSKris Buschelman MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2178209238afSKris Buschelman 2179209238afSKris Buschelman This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2180209238afSKris Buschelman and MATMPIBAIJ otherwise. 2181209238afSKris Buschelman 2182209238afSKris Buschelman Options Database Keys: 2183209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2184209238afSKris Buschelman 2185209238afSKris Buschelman Level: beginner 2186209238afSKris Buschelman 2187209238afSKris Buschelman .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ 2188209238afSKris Buschelman M*/ 2189209238afSKris Buschelman 2190209238afSKris Buschelman EXTERN_C_BEGIN 2191209238afSKris Buschelman #undef __FUNCT__ 2192209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ" 2193209238afSKris Buschelman int MatCreate_BAIJ(Mat A) { 2194209238afSKris Buschelman int ierr,size; 2195209238afSKris Buschelman 2196209238afSKris Buschelman PetscFunctionBegin; 2197209238afSKris Buschelman ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2198209238afSKris Buschelman ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2199209238afSKris Buschelman if (size == 1) { 2200209238afSKris Buschelman ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2201209238afSKris Buschelman } else { 2202209238afSKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2203209238afSKris Buschelman } 2204209238afSKris Buschelman PetscFunctionReturn(0); 2205209238afSKris Buschelman } 2206209238afSKris Buschelman EXTERN_C_END 2207209238afSKris Buschelman 22084a2ae208SSatish Balay #undef __FUNCT__ 22094a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2210273d9f13SBarry Smith /*@C 2211273d9f13SBarry Smith MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2212273d9f13SBarry Smith (block compressed row). For good matrix assembly performance 2213273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 2214273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2215273d9f13SBarry Smith performance can be increased by more than a factor of 50. 2216273d9f13SBarry Smith 2217273d9f13SBarry Smith Collective on Mat 2218273d9f13SBarry Smith 2219273d9f13SBarry Smith Input Parameters: 2220273d9f13SBarry Smith + A - the matrix 2221273d9f13SBarry Smith . bs - size of blockk 2222273d9f13SBarry Smith . d_nz - number of block nonzeros per block row in diagonal portion of local 2223273d9f13SBarry Smith submatrix (same for all local rows) 2224273d9f13SBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 2225273d9f13SBarry Smith of the in diagonal portion of the local (possibly different for each block 2226273d9f13SBarry Smith row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2227273d9f13SBarry Smith . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2228273d9f13SBarry Smith submatrix (same for all local rows). 2229273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 2230273d9f13SBarry Smith off-diagonal portion of the local submatrix (possibly different for 2231273d9f13SBarry Smith each block row) or PETSC_NULL. 2232273d9f13SBarry Smith 2233273d9f13SBarry Smith Output Parameter: 2234273d9f13SBarry Smith 2235273d9f13SBarry Smith 2236273d9f13SBarry Smith Options Database Keys: 2237273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 2238273d9f13SBarry Smith block calculations (much slower) 2239273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 2240273d9f13SBarry Smith 2241273d9f13SBarry Smith Notes: 2242273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2243273d9f13SBarry Smith than it must be used on all processors that share the object for that argument. 2244273d9f13SBarry Smith 2245273d9f13SBarry Smith Storage Information: 2246273d9f13SBarry Smith For a square global matrix we define each processor's diagonal portion 2247273d9f13SBarry Smith to be its local rows and the corresponding columns (a square submatrix); 2248273d9f13SBarry Smith each processor's off-diagonal portion encompasses the remainder of the 2249273d9f13SBarry Smith local matrix (a rectangular submatrix). 2250273d9f13SBarry Smith 2251273d9f13SBarry Smith The user can specify preallocated storage for the diagonal part of 2252273d9f13SBarry Smith the local submatrix with either d_nz or d_nnz (not both). Set 2253273d9f13SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2254273d9f13SBarry Smith memory allocation. Likewise, specify preallocated storage for the 2255273d9f13SBarry Smith off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2256273d9f13SBarry Smith 2257273d9f13SBarry Smith Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2258273d9f13SBarry Smith the figure below we depict these three local rows and all columns (0-11). 2259273d9f13SBarry Smith 2260273d9f13SBarry Smith .vb 2261273d9f13SBarry Smith 0 1 2 3 4 5 6 7 8 9 10 11 2262273d9f13SBarry Smith ------------------- 2263273d9f13SBarry Smith row 3 | o o o d d d o o o o o o 2264273d9f13SBarry Smith row 4 | o o o d d d o o o o o o 2265273d9f13SBarry Smith row 5 | o o o d d d o o o o o o 2266273d9f13SBarry Smith ------------------- 2267273d9f13SBarry Smith .ve 2268273d9f13SBarry Smith 2269273d9f13SBarry Smith Thus, any entries in the d locations are stored in the d (diagonal) 2270273d9f13SBarry Smith submatrix, and any entries in the o locations are stored in the 2271273d9f13SBarry Smith o (off-diagonal) submatrix. Note that the d and the o submatrices are 2272273d9f13SBarry Smith stored simply in the MATSEQBAIJ format for compressed row storage. 2273273d9f13SBarry Smith 2274273d9f13SBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2275273d9f13SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 2276273d9f13SBarry Smith In general, for PDE problems in which most nonzeros are near the diagonal, 2277273d9f13SBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2278273d9f13SBarry Smith or you will get TERRIBLE performance; see the users' manual chapter on 2279273d9f13SBarry Smith matrices. 2280273d9f13SBarry Smith 2281273d9f13SBarry Smith Level: intermediate 2282273d9f13SBarry Smith 2283273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel 2284273d9f13SBarry Smith 2285273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2286273d9f13SBarry Smith @*/ 2287ca01db9bSBarry Smith int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2288273d9f13SBarry Smith { 2289ca01db9bSBarry Smith int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 2290273d9f13SBarry Smith 2291273d9f13SBarry Smith PetscFunctionBegin; 2292a23d5eceSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2293a23d5eceSKris Buschelman if (f) { 2294a23d5eceSKris Buschelman ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2295273d9f13SBarry Smith } 2296273d9f13SBarry Smith PetscFunctionReturn(0); 2297273d9f13SBarry Smith } 2298273d9f13SBarry Smith 22994a2ae208SSatish Balay #undef __FUNCT__ 23004a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ" 230179bdfe76SSatish Balay /*@C 230279bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 230379bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 230479bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 230579bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 230679bdfe76SSatish Balay performance can be increased by more than a factor of 50. 230779bdfe76SSatish Balay 2308db81eaa0SLois Curfman McInnes Collective on MPI_Comm 2309db81eaa0SLois Curfman McInnes 231079bdfe76SSatish Balay Input Parameters: 2311db81eaa0SLois Curfman McInnes + comm - MPI communicator 231279bdfe76SSatish Balay . bs - size of blockk 231379bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 231492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 231592e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 231692e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 231792e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 231892e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 2319be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2320be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 232147a75d0bSBarry Smith . d_nz - number of nonzero blocks per block row in diagonal portion of local 232279bdfe76SSatish Balay submatrix (same for all local rows) 232347a75d0bSBarry Smith . d_nnz - array containing the number of nonzero blocks in the various block rows 232492e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 2325db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 232647a75d0bSBarry Smith . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 232779bdfe76SSatish Balay submatrix (same for all local rows). 232847a75d0bSBarry Smith - o_nnz - array containing the number of nonzero blocks in the various block rows of the 232992e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 233092e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 233179bdfe76SSatish Balay 233279bdfe76SSatish Balay Output Parameter: 233379bdfe76SSatish Balay . A - the matrix 233479bdfe76SSatish Balay 2335db81eaa0SLois Curfman McInnes Options Database Keys: 2336db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 2337db81eaa0SLois Curfman McInnes block calculations (much slower) 2338db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 23393ffaccefSLois Curfman McInnes 2340b259b22eSLois Curfman McInnes Notes: 234147a75d0bSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 234247a75d0bSBarry Smith 234379bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 234479bdfe76SSatish Balay (possibly both). 234579bdfe76SSatish Balay 2346be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2347be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 2348be79a94dSBarry Smith 234979bdfe76SSatish Balay Storage Information: 235079bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 235179bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 235279bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 235379bdfe76SSatish Balay local matrix (a rectangular submatrix). 235479bdfe76SSatish Balay 235579bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 235679bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 235779bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 235879bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 235979bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 236079bdfe76SSatish Balay 236179bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 236279bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 236379bdfe76SSatish Balay 2364db81eaa0SLois Curfman McInnes .vb 2365db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 2366db81eaa0SLois Curfman McInnes ------------------- 2367db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 2368db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 2369db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 2370db81eaa0SLois Curfman McInnes ------------------- 2371db81eaa0SLois Curfman McInnes .ve 237279bdfe76SSatish Balay 237379bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 237479bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 237579bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 237657b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 237779bdfe76SSatish Balay 2378d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2379d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 238079bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 238192e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 238292e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 23836da5968aSLois Curfman McInnes matrices. 238479bdfe76SSatish Balay 2385027ccd11SLois Curfman McInnes Level: intermediate 2386027ccd11SLois Curfman McInnes 238792e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 238879bdfe76SSatish Balay 23893eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 239079bdfe76SSatish Balay @*/ 2391ca01db9bSBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 239279bdfe76SSatish Balay { 2393273d9f13SBarry Smith int ierr,size; 239479bdfe76SSatish Balay 2395d64ed03dSBarry Smith PetscFunctionBegin; 2396273d9f13SBarry Smith ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2397d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2398273d9f13SBarry Smith if (size > 1) { 2399273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2400273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2401273d9f13SBarry Smith } else { 2402273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2403273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 24043914022bSBarry Smith } 24053a40ed3dSBarry Smith PetscFunctionReturn(0); 240679bdfe76SSatish Balay } 2407026e39d0SSatish Balay 24084a2ae208SSatish Balay #undef __FUNCT__ 24094a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ" 24102e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 24110ac07820SSatish Balay { 24120ac07820SSatish Balay Mat mat; 24130ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2414f1af5d2fSBarry Smith int ierr,len=0; 24150ac07820SSatish Balay 2416d64ed03dSBarry Smith PetscFunctionBegin; 24170ac07820SSatish Balay *newmat = 0; 2418273d9f13SBarry Smith ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2419be5d1d56SKris Buschelman ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 24207fff6886SHong Zhang 24214beb1cfeSHong Zhang ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 24224beb1cfeSHong Zhang mat->factor = matin->factor; 2423273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 24240ac07820SSatish Balay mat->assembled = PETSC_TRUE; 24257fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 24267fff6886SHong Zhang 2427273d9f13SBarry Smith a = (Mat_MPIBAIJ*)mat->data; 24280ac07820SSatish Balay a->bs = oldmat->bs; 24290ac07820SSatish Balay a->bs2 = oldmat->bs2; 24300ac07820SSatish Balay a->mbs = oldmat->mbs; 24310ac07820SSatish Balay a->nbs = oldmat->nbs; 24320ac07820SSatish Balay a->Mbs = oldmat->Mbs; 24330ac07820SSatish Balay a->Nbs = oldmat->Nbs; 24340ac07820SSatish Balay 24350ac07820SSatish Balay a->rstart = oldmat->rstart; 24360ac07820SSatish Balay a->rend = oldmat->rend; 24370ac07820SSatish Balay a->cstart = oldmat->cstart; 24380ac07820SSatish Balay a->cend = oldmat->cend; 24390ac07820SSatish Balay a->size = oldmat->size; 24400ac07820SSatish Balay a->rank = oldmat->rank; 2441aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2442aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2443aef5e8e0SSatish Balay a->rowindices = 0; 24440ac07820SSatish Balay a->rowvalues = 0; 24450ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 244630793edcSSatish Balay a->barray = 0; 24473123a43fSSatish Balay a->rstart_bs = oldmat->rstart_bs; 24483123a43fSSatish Balay a->rend_bs = oldmat->rend_bs; 24493123a43fSSatish Balay a->cstart_bs = oldmat->cstart_bs; 24503123a43fSSatish Balay a->cend_bs = oldmat->cend_bs; 24510ac07820SSatish Balay 2452133cdb44SSatish Balay /* hash table stuff */ 2453133cdb44SSatish Balay a->ht = 0; 2454133cdb44SSatish Balay a->hd = 0; 2455133cdb44SSatish Balay a->ht_size = 0; 2456133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 245725fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2458133cdb44SSatish Balay a->ht_total_ct = 0; 2459133cdb44SSatish Balay a->ht_insert_ct = 0; 2460133cdb44SSatish Balay 2461549d3d68SSatish Balay ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 24628798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 24638798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 24640ac07820SSatish Balay if (oldmat->colmap) { 2465aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 24660f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 246748e59246SSatish Balay #else 246882502324SSatish Balay ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2469b0a32e0cSBarry Smith PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2470549d3d68SSatish Balay ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 247148e59246SSatish Balay #endif 24720ac07820SSatish Balay } else a->colmap = 0; 24734beb1cfeSHong Zhang 24740ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 247582502324SSatish Balay ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2476b0a32e0cSBarry Smith PetscLogObjectMemory(mat,len*sizeof(int)); 2477549d3d68SSatish Balay ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 24780ac07820SSatish Balay } else a->garray = 0; 24790ac07820SSatish Balay 24800ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2481b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->lvec); 24820ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2483b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->Mvctx); 24847fff6886SHong Zhang 24852e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2486b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->A); 24872e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2488b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->B); 2489b0a32e0cSBarry Smith ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 24900ac07820SSatish Balay *newmat = mat; 24914beb1cfeSHong Zhang 24923a40ed3dSBarry Smith PetscFunctionReturn(0); 24930ac07820SSatish Balay } 249457b952d6SSatish Balay 2495e090d566SSatish Balay #include "petscsys.h" 249657b952d6SSatish Balay 24974a2ae208SSatish Balay #undef __FUNCT__ 24984a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ" 24998e9aea5cSBarry Smith int MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 250057b952d6SSatish Balay { 250157b952d6SSatish Balay Mat A; 250257b952d6SSatish Balay int i,nz,ierr,j,rstart,rend,fd; 250387828ca2SBarry Smith PetscScalar *vals,*buf; 250457b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 250557b952d6SSatish Balay MPI_Status status; 2506cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 250757b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2508f1af5d2fSBarry Smith int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 250957b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 251057b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 251157b952d6SSatish Balay 2512d64ed03dSBarry Smith PetscFunctionBegin; 2513b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 251457b952d6SSatish Balay 2515d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2516d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 251757b952d6SSatish Balay if (!rank) { 2518b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2519e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2520552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2521d64ed03dSBarry Smith if (header[3] < 0) { 252229bbc08cSBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2523d64ed03dSBarry Smith } 25246c5fab8fSBarry Smith } 2525d64ed03dSBarry Smith 2526ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 252757b952d6SSatish Balay M = header[1]; N = header[2]; 252857b952d6SSatish Balay 252929bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 253057b952d6SSatish Balay 253157b952d6SSatish Balay /* 253257b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 253357b952d6SSatish Balay divisible by the blocksize 253457b952d6SSatish Balay */ 253557b952d6SSatish Balay Mbs = M/bs; 253657b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 253757b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 253857b952d6SSatish Balay else Mbs++; 253957b952d6SSatish Balay if (extra_rows &&!rank) { 2540b0a32e0cSBarry Smith PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 254157b952d6SSatish Balay } 2542537820f0SBarry Smith 254357b952d6SSatish Balay /* determine ownership of all rows */ 254457b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 254557b952d6SSatish Balay m = mbs*bs; 2546b0a32e0cSBarry Smith ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2547cee3aa6bSSatish Balay browners = rowners + size + 1; 2548ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 254957b952d6SSatish Balay rowners[0] = 0; 2550cee3aa6bSSatish Balay for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2551cee3aa6bSSatish Balay for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 255257b952d6SSatish Balay rstart = rowners[rank]; 255357b952d6SSatish Balay rend = rowners[rank+1]; 255457b952d6SSatish Balay 255557b952d6SSatish Balay /* distribute row lengths to all processors */ 255682502324SSatish Balay ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 255757b952d6SSatish Balay if (!rank) { 2558b0a32e0cSBarry Smith ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2559e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 256057b952d6SSatish Balay for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 256182502324SSatish Balay ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2562cee3aa6bSSatish Balay for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2563ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2564606d414cSSatish Balay ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2565d64ed03dSBarry Smith } else { 2566ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 256757b952d6SSatish Balay } 256857b952d6SSatish Balay 256957b952d6SSatish Balay if (!rank) { 257057b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 257182502324SSatish Balay ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2572549d3d68SSatish Balay ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 257357b952d6SSatish Balay for (i=0; i<size; i++) { 257457b952d6SSatish Balay for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 257557b952d6SSatish Balay procsnz[i] += rowlengths[j]; 257657b952d6SSatish Balay } 257757b952d6SSatish Balay } 2578606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 257957b952d6SSatish Balay 258057b952d6SSatish Balay /* determine max buffer needed and allocate it */ 258157b952d6SSatish Balay maxnz = 0; 258257b952d6SSatish Balay for (i=0; i<size; i++) { 258357b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 258457b952d6SSatish Balay } 258582502324SSatish Balay ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 258657b952d6SSatish Balay 258757b952d6SSatish Balay /* read in my part of the matrix column indices */ 258857b952d6SSatish Balay nz = procsnz[0]; 258982502324SSatish Balay ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 259057b952d6SSatish Balay mycols = ibuf; 2591cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2592e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2593cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2594cee3aa6bSSatish Balay 259557b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 259657b952d6SSatish Balay for (i=1; i<size-1; i++) { 259757b952d6SSatish Balay nz = procsnz[i]; 2598e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2599ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 260057b952d6SSatish Balay } 260157b952d6SSatish Balay /* read in the stuff for the last proc */ 260257b952d6SSatish Balay if (size != 1) { 260357b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2604e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 260557b952d6SSatish Balay for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2606ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 260757b952d6SSatish Balay } 2608606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 2609d64ed03dSBarry Smith } else { 261057b952d6SSatish Balay /* determine buffer space needed for message */ 261157b952d6SSatish Balay nz = 0; 261257b952d6SSatish Balay for (i=0; i<m; i++) { 261357b952d6SSatish Balay nz += locrowlens[i]; 261457b952d6SSatish Balay } 261582502324SSatish Balay ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 261657b952d6SSatish Balay mycols = ibuf; 261757b952d6SSatish Balay /* receive message of column indices*/ 2618ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2619ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 262029bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 262157b952d6SSatish Balay } 262257b952d6SSatish Balay 262357b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 262482502324SSatish Balay ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2625cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 262682502324SSatish Balay ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2627549d3d68SSatish Balay ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 262857b952d6SSatish Balay masked1 = mask + Mbs; 262957b952d6SSatish Balay masked2 = masked1 + Mbs; 263057b952d6SSatish Balay rowcount = 0; nzcount = 0; 263157b952d6SSatish Balay for (i=0; i<mbs; i++) { 263257b952d6SSatish Balay dcount = 0; 263357b952d6SSatish Balay odcount = 0; 263457b952d6SSatish Balay for (j=0; j<bs; j++) { 263557b952d6SSatish Balay kmax = locrowlens[rowcount]; 263657b952d6SSatish Balay for (k=0; k<kmax; k++) { 263757b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 263857b952d6SSatish Balay if (!mask[tmp]) { 263957b952d6SSatish Balay mask[tmp] = 1; 264057b952d6SSatish Balay if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 264157b952d6SSatish Balay else masked1[dcount++] = tmp; 264257b952d6SSatish Balay } 264357b952d6SSatish Balay } 264457b952d6SSatish Balay rowcount++; 264557b952d6SSatish Balay } 2646cee3aa6bSSatish Balay 264757b952d6SSatish Balay dlens[i] = dcount; 264857b952d6SSatish Balay odlens[i] = odcount; 2649cee3aa6bSSatish Balay 265057b952d6SSatish Balay /* zero out the mask elements we set */ 265157b952d6SSatish Balay for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 265257b952d6SSatish Balay for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 265357b952d6SSatish Balay } 2654cee3aa6bSSatish Balay 265557b952d6SSatish Balay /* create our matrix */ 265678ae41b4SKris Buschelman ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 265778ae41b4SKris Buschelman ierr = MatSetType(A,type);CHKERRQ(ierr) 265878ae41b4SKris Buschelman ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 265978ae41b4SKris Buschelman 266078ae41b4SKris Buschelman /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 26616d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 266257b952d6SSatish Balay 266357b952d6SSatish Balay if (!rank) { 266487828ca2SBarry Smith ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 266557b952d6SSatish Balay /* read in my part of the matrix numerical values */ 266657b952d6SSatish Balay nz = procsnz[0]; 266757b952d6SSatish Balay vals = buf; 2668cee3aa6bSSatish Balay mycols = ibuf; 2669cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2670e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2671cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2672537820f0SBarry Smith 267357b952d6SSatish Balay /* insert into matrix */ 267457b952d6SSatish Balay jj = rstart*bs; 267557b952d6SSatish Balay for (i=0; i<m; i++) { 2676b48991e4SBarry Smith ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 267757b952d6SSatish Balay mycols += locrowlens[i]; 267857b952d6SSatish Balay vals += locrowlens[i]; 267957b952d6SSatish Balay jj++; 268057b952d6SSatish Balay } 268157b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 268257b952d6SSatish Balay for (i=1; i<size-1; i++) { 268357b952d6SSatish Balay nz = procsnz[i]; 268457b952d6SSatish Balay vals = buf; 2685e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2686ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 268757b952d6SSatish Balay } 268857b952d6SSatish Balay /* the last proc */ 268957b952d6SSatish Balay if (size != 1){ 269057b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2691cee3aa6bSSatish Balay vals = buf; 2692e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 269357b952d6SSatish Balay for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2694ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 269557b952d6SSatish Balay } 2696606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 2697d64ed03dSBarry Smith } else { 269857b952d6SSatish Balay /* receive numeric values */ 269987828ca2SBarry Smith ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 270057b952d6SSatish Balay 270157b952d6SSatish Balay /* receive message of values*/ 270257b952d6SSatish Balay vals = buf; 2703cee3aa6bSSatish Balay mycols = ibuf; 2704ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2705ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 270629bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 270757b952d6SSatish Balay 270857b952d6SSatish Balay /* insert into matrix */ 270957b952d6SSatish Balay jj = rstart*bs; 2710cee3aa6bSSatish Balay for (i=0; i<m; i++) { 2711b48991e4SBarry Smith ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 271257b952d6SSatish Balay mycols += locrowlens[i]; 271357b952d6SSatish Balay vals += locrowlens[i]; 271457b952d6SSatish Balay jj++; 271557b952d6SSatish Balay } 271657b952d6SSatish Balay } 2717606d414cSSatish Balay ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2718606d414cSSatish Balay ierr = PetscFree(buf);CHKERRQ(ierr); 2719606d414cSSatish Balay ierr = PetscFree(ibuf);CHKERRQ(ierr); 2720606d414cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 2721606d414cSSatish Balay ierr = PetscFree(dlens);CHKERRQ(ierr); 2722606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 27236d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 27246d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 272578ae41b4SKris Buschelman 272678ae41b4SKris Buschelman *newmat = A; 27273a40ed3dSBarry Smith PetscFunctionReturn(0); 272857b952d6SSatish Balay } 272957b952d6SSatish Balay 27304a2ae208SSatish Balay #undef __FUNCT__ 27314a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2732133cdb44SSatish Balay /*@ 2733133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2734133cdb44SSatish Balay 2735133cdb44SSatish Balay Input Parameters: 2736133cdb44SSatish Balay . mat - the matrix 2737133cdb44SSatish Balay . fact - factor 2738133cdb44SSatish Balay 2739fee21e36SBarry Smith Collective on Mat 2740fee21e36SBarry Smith 27418c890885SBarry Smith Level: advanced 27428c890885SBarry Smith 2743133cdb44SSatish Balay Notes: 2744133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2745133cdb44SSatish Balay 2746133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2747133cdb44SSatish Balay 2748133cdb44SSatish Balay .seealso: MatSetOption() 2749133cdb44SSatish Balay @*/ 2750329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2751133cdb44SSatish Balay { 27525bf65638SKris Buschelman int ierr,(*f)(Mat,PetscReal); 27535bf65638SKris Buschelman 27545bf65638SKris Buschelman PetscFunctionBegin; 27555bf65638SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 27565bf65638SKris Buschelman if (f) { 27575bf65638SKris Buschelman ierr = (*f)(mat,fact);CHKERRQ(ierr); 27585bf65638SKris Buschelman } 27595bf65638SKris Buschelman PetscFunctionReturn(0); 27605bf65638SKris Buschelman } 27615bf65638SKris Buschelman 27625bf65638SKris Buschelman #undef __FUNCT__ 27635bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 27645bf65638SKris Buschelman int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 27655bf65638SKris Buschelman { 276625fdafccSSatish Balay Mat_MPIBAIJ *baij; 2767133cdb44SSatish Balay 2768133cdb44SSatish Balay PetscFunctionBegin; 2769133cdb44SSatish Balay PetscValidHeaderSpecific(mat,MAT_COOKIE); 2770133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 2771133cdb44SSatish Balay baij->ht_fact = fact; 2772133cdb44SSatish Balay PetscFunctionReturn(0); 2773133cdb44SSatish Balay } 2774f2a5309cSSatish Balay 27754a2ae208SSatish Balay #undef __FUNCT__ 27764a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2777268466fbSBarry Smith int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2778f2a5309cSSatish Balay { 2779f2a5309cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2780f2a5309cSSatish Balay PetscFunctionBegin; 2781f2a5309cSSatish Balay *Ad = a->A; 2782f2a5309cSSatish Balay *Ao = a->B; 2783195d93cdSBarry Smith *colmap = a->garray; 2784f2a5309cSSatish Balay PetscFunctionReturn(0); 2785f2a5309cSSatish Balay } 2786