1bba1ac68SSatish Balay /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/ 279bdfe76SSatish Balay 3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 579bdfe76SSatish Balay 6ca44d042SBarry Smith EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 7ca44d042SBarry Smith EXTERN int DisAssemble_MPIBAIJ(Mat); 8268466fbSBarry Smith EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int); 9268466fbSBarry Smith EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 10f15d580aSBarry Smith EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []); 11f15d580aSBarry Smith EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode); 12f15d580aSBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 13268466fbSBarry Smith EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 14268466fbSBarry Smith EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 15ca44d042SBarry Smith EXTERN int MatPrintHelp_SeqBAIJ(Mat); 16268466fbSBarry Smith EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 1793fea6afSBarry Smith 1893fea6afSBarry Smith /* UGLY, ugly, ugly 1987828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 2093fea6afSBarry Smith not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 2193fea6afSBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 2293fea6afSBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 2393fea6afSBarry Smith into the single precision data structures. 2493fea6afSBarry Smith */ 2593fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2634232ad1SSatish Balay EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 2734232ad1SSatish Balay EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 2834232ad1SSatish Balay EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 2934232ad1SSatish Balay EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 3034232ad1SSatish Balay EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 3193fea6afSBarry Smith #else 326fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 3393fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 3493fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 356fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 366fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 3793fea6afSBarry Smith #endif 383b2fbd54SBarry Smith 394a2ae208SSatish Balay #undef __FUNCT__ 404a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 417843d17aSBarry Smith int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 427843d17aSBarry Smith { 437843d17aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 447843d17aSBarry Smith int ierr,i; 4587828ca2SBarry Smith PetscScalar *va,*vb; 467843d17aSBarry Smith Vec vtmp; 477843d17aSBarry Smith 487843d17aSBarry Smith PetscFunctionBegin; 497843d17aSBarry Smith 507843d17aSBarry Smith ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51273d9f13SBarry Smith ierr = VecGetArray(v,&va);CHKERRQ(ierr); 527843d17aSBarry Smith 53ac355199SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 547843d17aSBarry Smith ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55273d9f13SBarry Smith ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 567843d17aSBarry Smith 57273d9f13SBarry Smith for (i=0; i<A->m; i++){ 58273d9f13SBarry Smith if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 597843d17aSBarry Smith } 607843d17aSBarry Smith 617843d17aSBarry Smith ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 627843d17aSBarry Smith ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63ac355199SBarry Smith ierr = VecDestroy(vtmp);CHKERRQ(ierr); 647843d17aSBarry Smith 657843d17aSBarry Smith PetscFunctionReturn(0); 667843d17aSBarry Smith } 677843d17aSBarry Smith 687fc3c18eSBarry Smith EXTERN_C_BEGIN 694a2ae208SSatish Balay #undef __FUNCT__ 704a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ" 717fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat) 727fc3c18eSBarry Smith { 737fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 747fc3c18eSBarry Smith int ierr; 757fc3c18eSBarry Smith 767fc3c18eSBarry Smith PetscFunctionBegin; 777fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 787fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 797fc3c18eSBarry Smith PetscFunctionReturn(0); 807fc3c18eSBarry Smith } 817fc3c18eSBarry Smith EXTERN_C_END 827fc3c18eSBarry Smith 837fc3c18eSBarry Smith EXTERN_C_BEGIN 844a2ae208SSatish Balay #undef __FUNCT__ 854a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 867fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat) 877fc3c18eSBarry Smith { 887fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 897fc3c18eSBarry Smith int ierr; 907fc3c18eSBarry Smith 917fc3c18eSBarry Smith PetscFunctionBegin; 927fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 937fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 947fc3c18eSBarry Smith PetscFunctionReturn(0); 957fc3c18eSBarry Smith } 967fc3c18eSBarry Smith EXTERN_C_END 977fc3c18eSBarry Smith 98537820f0SBarry Smith /* 99537820f0SBarry Smith Local utility routine that creates a mapping from the global column 10057b952d6SSatish Balay number to the local number in the off-diagonal part of the local 10157b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 10257b952d6SSatish Balay length of colmap equals the global matrix length. 10357b952d6SSatish Balay */ 1044a2ae208SSatish Balay #undef __FUNCT__ 1054a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 10657b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 10757b952d6SSatish Balay { 10857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 10957b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110dc2900e9SSatish Balay int nbs = B->nbs,i,bs=B->bs,ierr; 11157b952d6SSatish Balay 112d64ed03dSBarry Smith PetscFunctionBegin; 113aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 114f14a1c24SBarry Smith ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 11548e59246SSatish Balay for (i=0; i<nbs; i++){ 1160f5bd95cSBarry Smith ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 11748e59246SSatish Balay } 11848e59246SSatish Balay #else 11982502324SSatish Balay ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 120b0a32e0cSBarry Smith PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 121549d3d68SSatish Balay ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 122928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 12348e59246SSatish Balay #endif 1243a40ed3dSBarry Smith PetscFunctionReturn(0); 12557b952d6SSatish Balay } 12657b952d6SSatish Balay 12780c1aa95SSatish Balay #define CHUNKSIZE 10 12880c1aa95SSatish Balay 129f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 13080c1aa95SSatish Balay { \ 13180c1aa95SSatish Balay \ 13280c1aa95SSatish Balay brow = row/bs; \ 13380c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 13580c1aa95SSatish Balay bcol = col/bs; \ 13680c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 137ab26458aSBarry Smith low = 0; high = nrow; \ 138ab26458aSBarry Smith while (high-low > 3) { \ 139ab26458aSBarry Smith t = (low+high)/2; \ 140ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 141ab26458aSBarry Smith else low = t; \ 142ab26458aSBarry Smith } \ 143ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 14480c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 14580c1aa95SSatish Balay if (rp[_i] == bcol) { \ 14680c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 147eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 148eada6651SSatish Balay else *bap = value; \ 149ac7a638eSSatish Balay goto a_noinsert; \ 15080c1aa95SSatish Balay } \ 15180c1aa95SSatish Balay } \ 15289280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 15329bbc08cSBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 15480c1aa95SSatish Balay if (nrow >= rmax) { \ 15580c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 15680c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 1573eda8832SBarry Smith MatScalar *new_a; \ 15880c1aa95SSatish Balay \ 15929bbc08cSBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 16089280ab3SLois Curfman McInnes \ 16180c1aa95SSatish Balay /* malloc new storage space */ \ 1623eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 16382502324SSatish Balay ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 16480c1aa95SSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 16580c1aa95SSatish Balay new_i = new_j + new_nz; \ 16680c1aa95SSatish Balay \ 16780c1aa95SSatish Balay /* copy over old data into new slots */ \ 16880c1aa95SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 16980c1aa95SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 17180c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 1723eda8832SBarry Smith ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 1733eda8832SBarry Smith ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 17487828ca2SBarry Smith ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 1763eda8832SBarry Smith aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 17780c1aa95SSatish Balay /* free up old matrix storage */ \ 178606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179606d414cSSatish Balay if (!a->singlemalloc) { \ 180606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182606d414cSSatish Balay } \ 18380c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1847c922b88SBarry Smith a->singlemalloc = PETSC_TRUE; \ 18580c1aa95SSatish Balay \ 18680c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188b0a32e0cSBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 18980c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 19080c1aa95SSatish Balay a->reallocs++; \ 19180c1aa95SSatish Balay a->nz++; \ 19280c1aa95SSatish Balay } \ 19380c1aa95SSatish Balay N = nrow++ - 1; \ 19480c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 19580c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 19680c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1973eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 19880c1aa95SSatish Balay } \ 1993eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 20080c1aa95SSatish Balay rp[_i] = bcol; \ 20180c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 202ac7a638eSSatish Balay a_noinsert:; \ 20380c1aa95SSatish Balay ailen[brow] = nrow; \ 20480c1aa95SSatish Balay } 20557b952d6SSatish Balay 206ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207ac7a638eSSatish Balay { \ 208ac7a638eSSatish Balay brow = row/bs; \ 209ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 211ac7a638eSSatish Balay bcol = col/bs; \ 212ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 213ac7a638eSSatish Balay low = 0; high = nrow; \ 214ac7a638eSSatish Balay while (high-low > 3) { \ 215ac7a638eSSatish Balay t = (low+high)/2; \ 216ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 217ac7a638eSSatish Balay else low = t; \ 218ac7a638eSSatish Balay } \ 219ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 220ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 221ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 222ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 223ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 224ac7a638eSSatish Balay else *bap = value; \ 225ac7a638eSSatish Balay goto b_noinsert; \ 226ac7a638eSSatish Balay } \ 227ac7a638eSSatish Balay } \ 22889280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 22929bbc08cSBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 230ac7a638eSSatish Balay if (nrow >= rmax) { \ 231ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 23274c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 2333eda8832SBarry Smith MatScalar *new_a; \ 234ac7a638eSSatish Balay \ 23529bbc08cSBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 23689280ab3SLois Curfman McInnes \ 237ac7a638eSSatish Balay /* malloc new storage space */ \ 2383eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 23982502324SSatish Balay ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240ac7a638eSSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 241ac7a638eSSatish Balay new_i = new_j + new_nz; \ 242ac7a638eSSatish Balay \ 243ac7a638eSSatish Balay /* copy over old data into new slots */ \ 244ac7a638eSSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 24574c639caSSatish Balay for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246549d3d68SSatish Balay ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 247ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 2483eda8832SBarry Smith ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 2493eda8832SBarry Smith ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 2503eda8832SBarry Smith ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 2523eda8832SBarry Smith ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253ac7a638eSSatish Balay /* free up old matrix storage */ \ 254606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255606d414cSSatish Balay if (!b->singlemalloc) { \ 256606d414cSSatish Balay ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257606d414cSSatish Balay ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258606d414cSSatish Balay } \ 25974c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 2607c922b88SBarry Smith b->singlemalloc = PETSC_TRUE; \ 261ac7a638eSSatish Balay \ 262ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264b0a32e0cSBarry Smith PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 26574c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 26674c639caSSatish Balay b->reallocs++; \ 26774c639caSSatish Balay b->nz++; \ 268ac7a638eSSatish Balay } \ 269ac7a638eSSatish Balay N = nrow++ - 1; \ 270ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 271ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 272ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 2733eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274ac7a638eSSatish Balay } \ 2753eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276ac7a638eSSatish Balay rp[_i] = bcol; \ 277ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 278ac7a638eSSatish Balay b_noinsert:; \ 279ac7a638eSSatish Balay bilen[brow] = nrow; \ 280ac7a638eSSatish Balay } 281ac7a638eSSatish Balay 28293fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2834a2ae208SSatish Balay #undef __FUNCT__ 2844a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ" 285f15d580aSBarry Smith int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 28657b952d6SSatish Balay { 2876fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 28893fea6afSBarry Smith int ierr,i,N = m*n; 2896fa18ffdSBarry Smith MatScalar *vsingle; 29093fea6afSBarry Smith 29193fea6afSBarry Smith PetscFunctionBegin; 2926fa18ffdSBarry Smith if (N > b->setvalueslen) { 2936fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 29482502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2956fa18ffdSBarry Smith b->setvalueslen = N; 2966fa18ffdSBarry Smith } 2976fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2986fa18ffdSBarry Smith 29993fea6afSBarry Smith for (i=0; i<N; i++) { 30093fea6afSBarry Smith vsingle[i] = v[i]; 30193fea6afSBarry Smith } 30293fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 30393fea6afSBarry Smith PetscFunctionReturn(0); 30493fea6afSBarry Smith } 30593fea6afSBarry Smith 3064a2ae208SSatish Balay #undef __FUNCT__ 3074a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 308f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 30993fea6afSBarry Smith { 3106fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 3116fa18ffdSBarry Smith int ierr,i,N = m*n*b->bs2; 3126fa18ffdSBarry Smith MatScalar *vsingle; 31393fea6afSBarry Smith 31493fea6afSBarry Smith PetscFunctionBegin; 3156fa18ffdSBarry Smith if (N > b->setvalueslen) { 3166fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 31782502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3186fa18ffdSBarry Smith b->setvalueslen = N; 3196fa18ffdSBarry Smith } 3206fa18ffdSBarry Smith vsingle = b->setvaluescopy; 32193fea6afSBarry Smith for (i=0; i<N; i++) { 32293fea6afSBarry Smith vsingle[i] = v[i]; 32393fea6afSBarry Smith } 32493fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 32593fea6afSBarry Smith PetscFunctionReturn(0); 32693fea6afSBarry Smith } 3276fa18ffdSBarry Smith 3284a2ae208SSatish Balay #undef __FUNCT__ 3294a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 330f15d580aSBarry Smith int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 3316fa18ffdSBarry Smith { 3326fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 3336fa18ffdSBarry Smith int ierr,i,N = m*n; 3346fa18ffdSBarry Smith MatScalar *vsingle; 3356fa18ffdSBarry Smith 3366fa18ffdSBarry Smith PetscFunctionBegin; 3376fa18ffdSBarry Smith if (N > b->setvalueslen) { 3386fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 33982502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3406fa18ffdSBarry Smith b->setvalueslen = N; 3416fa18ffdSBarry Smith } 3426fa18ffdSBarry Smith vsingle = b->setvaluescopy; 3436fa18ffdSBarry Smith for (i=0; i<N; i++) { 3446fa18ffdSBarry Smith vsingle[i] = v[i]; 3456fa18ffdSBarry Smith } 3466fa18ffdSBarry Smith ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 3476fa18ffdSBarry Smith PetscFunctionReturn(0); 3486fa18ffdSBarry Smith } 3496fa18ffdSBarry Smith 3504a2ae208SSatish Balay #undef __FUNCT__ 3514a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 352f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 3536fa18ffdSBarry Smith { 3546fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 3556fa18ffdSBarry Smith int ierr,i,N = m*n*b->bs2; 3566fa18ffdSBarry Smith MatScalar *vsingle; 3576fa18ffdSBarry Smith 3586fa18ffdSBarry Smith PetscFunctionBegin; 3596fa18ffdSBarry Smith if (N > b->setvalueslen) { 3606fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 36182502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3626fa18ffdSBarry Smith b->setvalueslen = N; 3636fa18ffdSBarry Smith } 3646fa18ffdSBarry Smith vsingle = b->setvaluescopy; 3656fa18ffdSBarry Smith for (i=0; i<N; i++) { 3666fa18ffdSBarry Smith vsingle[i] = v[i]; 3676fa18ffdSBarry Smith } 3686fa18ffdSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 3696fa18ffdSBarry Smith PetscFunctionReturn(0); 3706fa18ffdSBarry Smith } 37193fea6afSBarry Smith #endif 37293fea6afSBarry Smith 3734a2ae208SSatish Balay #undef __FUNCT__ 374e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 375f15d580aSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 37693fea6afSBarry Smith { 37757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 37893fea6afSBarry Smith MatScalar value; 379273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 3804fa0d573SSatish Balay int ierr,i,j,row,col; 381273d9f13SBarry Smith int rstart_orig=baij->rstart_bs; 3824fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 3834fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 38457b952d6SSatish Balay 385eada6651SSatish Balay /* Some Variables required in the macro */ 38680c1aa95SSatish Balay Mat A = baij->A; 38780c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 388ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 3893eda8832SBarry Smith MatScalar *aa=a->a; 390ac7a638eSSatish Balay 391ac7a638eSSatish Balay Mat B = baij->B; 392ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 393ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 3943eda8832SBarry Smith MatScalar *ba=b->a; 395ac7a638eSSatish Balay 396ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 397ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 3983eda8832SBarry Smith MatScalar *ap,*bap; 39980c1aa95SSatish Balay 400d64ed03dSBarry Smith PetscFunctionBegin; 40157b952d6SSatish Balay for (i=0; i<m; i++) { 4025ef9f2a5SBarry Smith if (im[i] < 0) continue; 403aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 404590ac198SBarry Smith if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 405639f9d9dSBarry Smith #endif 40657b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 40757b952d6SSatish Balay row = im[i] - rstart_orig; 40857b952d6SSatish Balay for (j=0; j<n; j++) { 40957b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 41057b952d6SSatish Balay col = in[j] - cstart_orig; 41157b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 41380c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 41473959e64SBarry Smith } else if (in[j] < 0) continue; 415aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 416590ac198SBarry Smith else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);} 417639f9d9dSBarry Smith #endif 41857b952d6SSatish Balay else { 41957b952d6SSatish Balay if (mat->was_assembled) { 420905e6a2fSBarry Smith if (!baij->colmap) { 421905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 422905e6a2fSBarry Smith } 423aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 4240f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 425bba1ac68SSatish Balay col = col - 1; 42648e59246SSatish Balay #else 427bba1ac68SSatish Balay col = baij->colmap[in[j]/bs] - 1; 42848e59246SSatish Balay #endif 42957b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 43057b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4318295de27SSatish Balay col = in[j]; 4329bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 4339bf004c3SSatish Balay B = baij->B; 4349bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 4359bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 4369bf004c3SSatish Balay ba=b->a; 437bba1ac68SSatish Balay } else col += in[j]%bs; 4388295de27SSatish Balay } else col = in[j]; 43957b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 44090da58bdSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 44190da58bdSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 44257b952d6SSatish Balay } 44357b952d6SSatish Balay } 444d64ed03dSBarry Smith } else { 44590f02eecSBarry Smith if (!baij->donotstash) { 446ff2fd236SBarry Smith if (roworiented) { 4476fa18ffdSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 448ff2fd236SBarry Smith } else { 4496fa18ffdSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 45057b952d6SSatish Balay } 45157b952d6SSatish Balay } 45257b952d6SSatish Balay } 45390f02eecSBarry Smith } 4543a40ed3dSBarry Smith PetscFunctionReturn(0); 45557b952d6SSatish Balay } 45657b952d6SSatish Balay 4574a2ae208SSatish Balay #undef __FUNCT__ 4584a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 459f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 460ab26458aSBarry Smith { 461ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 462f15d580aSBarry Smith const MatScalar *value; 463f15d580aSBarry Smith MatScalar *barray=baij->barray; 464273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 465273d9f13SBarry Smith int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 466ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 467ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 468ab26458aSBarry Smith 469b16ae2b1SBarry Smith PetscFunctionBegin; 47030793edcSSatish Balay if(!barray) { 47182502324SSatish Balay ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 47282502324SSatish Balay baij->barray = barray; 47330793edcSSatish Balay } 47430793edcSSatish Balay 475ab26458aSBarry Smith if (roworiented) { 476ab26458aSBarry Smith stepval = (n-1)*bs; 477ab26458aSBarry Smith } else { 478ab26458aSBarry Smith stepval = (m-1)*bs; 479ab26458aSBarry Smith } 480ab26458aSBarry Smith for (i=0; i<m; i++) { 4815ef9f2a5SBarry Smith if (im[i] < 0) continue; 482aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 483590ac198SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 484ab26458aSBarry Smith #endif 485ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 486ab26458aSBarry Smith row = im[i] - rstart; 487ab26458aSBarry Smith for (j=0; j<n; j++) { 48815b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 48915b57d14SSatish Balay if ((roworiented) && (n == 1)) { 490f15d580aSBarry Smith barray = (MatScalar*)v + i*bs2; 49115b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 492f15d580aSBarry Smith barray = (MatScalar*)v + j*bs2; 49315b57d14SSatish Balay } else { /* Here a copy is required */ 494ab26458aSBarry Smith if (roworiented) { 495ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 496ab26458aSBarry Smith } else { 497ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 498abef11f7SSatish Balay } 49947513183SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 50047513183SBarry Smith for (jj=0; jj<bs; jj++) { 50130793edcSSatish Balay *barray++ = *value++; 50247513183SBarry Smith } 50347513183SBarry Smith } 50430793edcSSatish Balay barray -=bs2; 50515b57d14SSatish Balay } 506abef11f7SSatish Balay 507abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 508abef11f7SSatish Balay col = in[j] - cstart; 50993fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 510ab26458aSBarry Smith } 5115ef9f2a5SBarry Smith else if (in[j] < 0) continue; 512aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 513590ac198SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 514ab26458aSBarry Smith #endif 515ab26458aSBarry Smith else { 516ab26458aSBarry Smith if (mat->was_assembled) { 517ab26458aSBarry Smith if (!baij->colmap) { 518ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 519ab26458aSBarry Smith } 520a5eb4965SSatish Balay 521aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 522aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 523fa46199cSSatish Balay { int data; 5240f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 52529bbc08cSBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 526fa46199cSSatish Balay } 52748e59246SSatish Balay #else 52829bbc08cSBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 529a5eb4965SSatish Balay #endif 53048e59246SSatish Balay #endif 531aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 5320f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 533fa46199cSSatish Balay col = (col - 1)/bs; 53448e59246SSatish Balay #else 535a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 53648e59246SSatish Balay #endif 537ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 538ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 539ab26458aSBarry Smith col = in[j]; 540ab26458aSBarry Smith } 541ab26458aSBarry Smith } 542ab26458aSBarry Smith else col = in[j]; 54393fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 544ab26458aSBarry Smith } 545ab26458aSBarry Smith } 546d64ed03dSBarry Smith } else { 547ab26458aSBarry Smith if (!baij->donotstash) { 548ff2fd236SBarry Smith if (roworiented) { 5496fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 550ff2fd236SBarry Smith } else { 5516fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 552ff2fd236SBarry Smith } 553abef11f7SSatish Balay } 554ab26458aSBarry Smith } 555ab26458aSBarry Smith } 5563a40ed3dSBarry Smith PetscFunctionReturn(0); 557ab26458aSBarry Smith } 5586fa18ffdSBarry Smith 5590bdbc534SSatish Balay #define HASH_KEY 0.6180339887 560c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 5616fa18ffdSBarry Smith /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 5634a2ae208SSatish Balay #undef __FUNCT__ 5644a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 565f15d580aSBarry Smith int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 5660bdbc534SSatish Balay { 5670bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 568273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 5690bdbc534SSatish Balay int ierr,i,j,row,col; 570273d9f13SBarry Smith int rstart_orig=baij->rstart_bs; 571c2760754SSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 572c2760754SSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 573329f5518SBarry Smith PetscReal tmp; 5743eda8832SBarry Smith MatScalar **HD = baij->hd,value; 575aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 5764a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5774a15367fSSatish Balay #endif 5780bdbc534SSatish Balay 5790bdbc534SSatish Balay PetscFunctionBegin; 5800bdbc534SSatish Balay 5810bdbc534SSatish Balay for (i=0; i<m; i++) { 582aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 58329bbc08cSBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 584590ac198SBarry Smith if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 5850bdbc534SSatish Balay #endif 5860bdbc534SSatish Balay row = im[i]; 587c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 5880bdbc534SSatish Balay for (j=0; j<n; j++) { 5890bdbc534SSatish Balay col = in[j]; 5906fa18ffdSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5910bdbc534SSatish Balay /* Look up into the Hash Table */ 592c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 593c2760754SSatish Balay h1 = HASH(size,key,tmp); 5940bdbc534SSatish Balay 595c2760754SSatish Balay 596c2760754SSatish Balay idx = h1; 597aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 598187ce0cbSSatish Balay insert_ct++; 599187ce0cbSSatish Balay total_ct++; 600187ce0cbSSatish Balay if (HT[idx] != key) { 601187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 602187ce0cbSSatish Balay if (idx == size) { 603187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 604187ce0cbSSatish Balay if (idx == h1) { 60529bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 606187ce0cbSSatish Balay } 607187ce0cbSSatish Balay } 608187ce0cbSSatish Balay } 609187ce0cbSSatish Balay #else 610c2760754SSatish Balay if (HT[idx] != key) { 611c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 612c2760754SSatish Balay if (idx == size) { 613c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 614c2760754SSatish Balay if (idx == h1) { 61529bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 616c2760754SSatish Balay } 617c2760754SSatish Balay } 618c2760754SSatish Balay } 619187ce0cbSSatish Balay #endif 620c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 621c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 622c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 6230bdbc534SSatish Balay } 6240bdbc534SSatish Balay } else { 6250bdbc534SSatish Balay if (!baij->donotstash) { 626ff2fd236SBarry Smith if (roworiented) { 6278798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 628ff2fd236SBarry Smith } else { 6298798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 6300bdbc534SSatish Balay } 6310bdbc534SSatish Balay } 6320bdbc534SSatish Balay } 6330bdbc534SSatish Balay } 634aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 635187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 636187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 637187ce0cbSSatish Balay #endif 6380bdbc534SSatish Balay PetscFunctionReturn(0); 6390bdbc534SSatish Balay } 6400bdbc534SSatish Balay 6414a2ae208SSatish Balay #undef __FUNCT__ 6424a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 643f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 6440bdbc534SSatish Balay { 6450bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 646273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 6478798bf22SSatish Balay int ierr,i,j,ii,jj,row,col; 648273d9f13SBarry Smith int rstart=baij->rstart ; 649b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 650c2760754SSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 651329f5518SBarry Smith PetscReal tmp; 6523eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 653f15d580aSBarry Smith const MatScalar *v_t,*value; 654aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 6554a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 6564a15367fSSatish Balay #endif 6570bdbc534SSatish Balay 658d0a41580SSatish Balay PetscFunctionBegin; 659d0a41580SSatish Balay 6600bdbc534SSatish Balay if (roworiented) { 6610bdbc534SSatish Balay stepval = (n-1)*bs; 6620bdbc534SSatish Balay } else { 6630bdbc534SSatish Balay stepval = (m-1)*bs; 6640bdbc534SSatish Balay } 6650bdbc534SSatish Balay for (i=0; i<m; i++) { 666aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 667590ac198SBarry Smith if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]); 668590ac198SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1); 6690bdbc534SSatish Balay #endif 6700bdbc534SSatish Balay row = im[i]; 671187ce0cbSSatish Balay v_t = v + i*bs2; 672c2760754SSatish Balay if (row >= rstart && row < rend) { 6730bdbc534SSatish Balay for (j=0; j<n; j++) { 6740bdbc534SSatish Balay col = in[j]; 6750bdbc534SSatish Balay 6760bdbc534SSatish Balay /* Look up into the Hash Table */ 677c2760754SSatish Balay key = row*Nbs+col+1; 678c2760754SSatish Balay h1 = HASH(size,key,tmp); 6790bdbc534SSatish Balay 680c2760754SSatish Balay idx = h1; 681aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 682187ce0cbSSatish Balay total_ct++; 683187ce0cbSSatish Balay insert_ct++; 684187ce0cbSSatish Balay if (HT[idx] != key) { 685187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 686187ce0cbSSatish Balay if (idx == size) { 687187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 688187ce0cbSSatish Balay if (idx == h1) { 68929bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 690187ce0cbSSatish Balay } 691187ce0cbSSatish Balay } 692187ce0cbSSatish Balay } 693187ce0cbSSatish Balay #else 694c2760754SSatish Balay if (HT[idx] != key) { 695c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 696c2760754SSatish Balay if (idx == size) { 697c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 698c2760754SSatish Balay if (idx == h1) { 69929bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 700c2760754SSatish Balay } 701c2760754SSatish Balay } 702c2760754SSatish Balay } 703187ce0cbSSatish Balay #endif 704c2760754SSatish Balay baij_a = HD[idx]; 7050bdbc534SSatish Balay if (roworiented) { 706c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 707187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 708187ce0cbSSatish Balay value = v_t; 709187ce0cbSSatish Balay v_t += bs; 710fef45726SSatish Balay if (addv == ADD_VALUES) { 711c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 712c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 713fef45726SSatish Balay baij_a[jj] += *value++; 714b4cc0f5aSSatish Balay } 715b4cc0f5aSSatish Balay } 716fef45726SSatish Balay } else { 717c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 718c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 719fef45726SSatish Balay baij_a[jj] = *value++; 720fef45726SSatish Balay } 721fef45726SSatish Balay } 722fef45726SSatish Balay } 7230bdbc534SSatish Balay } else { 7240bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 725fef45726SSatish Balay if (addv == ADD_VALUES) { 726b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 7270bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 728fef45726SSatish Balay baij_a[jj] += *value++; 729fef45726SSatish Balay } 730fef45726SSatish Balay } 731fef45726SSatish Balay } else { 732fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 733fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 734fef45726SSatish Balay baij_a[jj] = *value++; 735fef45726SSatish Balay } 736b4cc0f5aSSatish Balay } 7370bdbc534SSatish Balay } 7380bdbc534SSatish Balay } 7390bdbc534SSatish Balay } 7400bdbc534SSatish Balay } else { 7410bdbc534SSatish Balay if (!baij->donotstash) { 7420bdbc534SSatish Balay if (roworiented) { 7438798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 7440bdbc534SSatish Balay } else { 7458798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 7460bdbc534SSatish Balay } 7470bdbc534SSatish Balay } 7480bdbc534SSatish Balay } 7490bdbc534SSatish Balay } 750aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 751187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 752187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 753187ce0cbSSatish Balay #endif 7540bdbc534SSatish Balay PetscFunctionReturn(0); 7550bdbc534SSatish Balay } 756133cdb44SSatish Balay 7574a2ae208SSatish Balay #undef __FUNCT__ 7584a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ" 759f15d580aSBarry Smith int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 760d6de1c52SSatish Balay { 761d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 762d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 76348e59246SSatish Balay int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 764d6de1c52SSatish Balay 765133cdb44SSatish Balay PetscFunctionBegin; 766d6de1c52SSatish Balay for (i=0; i<m; i++) { 767590ac198SBarry Smith if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 768590ac198SBarry Smith if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 769d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 770d6de1c52SSatish Balay row = idxm[i] - bsrstart; 771d6de1c52SSatish Balay for (j=0; j<n; j++) { 772590ac198SBarry Smith if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 773590ac198SBarry Smith if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 774d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 775d6de1c52SSatish Balay col = idxn[j] - bscstart; 77698dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 777d64ed03dSBarry Smith } else { 778905e6a2fSBarry Smith if (!baij->colmap) { 779905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 780905e6a2fSBarry Smith } 781aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 7820f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 783fa46199cSSatish Balay data --; 78448e59246SSatish Balay #else 78548e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 78648e59246SSatish Balay #endif 78748e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 788d9d09a02SSatish Balay else { 78948e59246SSatish Balay col = data + idxn[j]%bs; 79098dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 791d6de1c52SSatish Balay } 792d6de1c52SSatish Balay } 793d6de1c52SSatish Balay } 794d64ed03dSBarry Smith } else { 79529bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 796d6de1c52SSatish Balay } 797d6de1c52SSatish Balay } 7983a40ed3dSBarry Smith PetscFunctionReturn(0); 799d6de1c52SSatish Balay } 800d6de1c52SSatish Balay 8014a2ae208SSatish Balay #undef __FUNCT__ 8024a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ" 803064f8208SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 804d6de1c52SSatish Balay { 805d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 806d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 807acdf5bf4SSatish Balay int ierr,i,bs2=baij->bs2; 808329f5518SBarry Smith PetscReal sum = 0.0; 8093eda8832SBarry Smith MatScalar *v; 810d6de1c52SSatish Balay 811d64ed03dSBarry Smith PetscFunctionBegin; 812d6de1c52SSatish Balay if (baij->size == 1) { 813064f8208SBarry Smith ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 814d6de1c52SSatish Balay } else { 815d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 816d6de1c52SSatish Balay v = amat->a; 817d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++) { 818aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 819329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 820d6de1c52SSatish Balay #else 821d6de1c52SSatish Balay sum += (*v)*(*v); v++; 822d6de1c52SSatish Balay #endif 823d6de1c52SSatish Balay } 824d6de1c52SSatish Balay v = bmat->a; 825d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++) { 826aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 827329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 828d6de1c52SSatish Balay #else 829d6de1c52SSatish Balay sum += (*v)*(*v); v++; 830d6de1c52SSatish Balay #endif 831d6de1c52SSatish Balay } 832064f8208SBarry Smith ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 833064f8208SBarry Smith *nrm = sqrt(*nrm); 834d64ed03dSBarry Smith } else { 83529bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 836d6de1c52SSatish Balay } 837d64ed03dSBarry Smith } 8383a40ed3dSBarry Smith PetscFunctionReturn(0); 839d6de1c52SSatish Balay } 84057b952d6SSatish Balay 841bd7f49f5SSatish Balay 842fef45726SSatish Balay /* 843fef45726SSatish Balay Creates the hash table, and sets the table 844fef45726SSatish Balay This table is created only once. 845fef45726SSatish Balay If new entried need to be added to the matrix 846fef45726SSatish Balay then the hash table has to be destroyed and 847fef45726SSatish Balay recreated. 848fef45726SSatish Balay */ 8494a2ae208SSatish Balay #undef __FUNCT__ 8504a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 851329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 852596b8d2eSBarry Smith { 853596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 854596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 855596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 8560bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 857549d3d68SSatish Balay int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 858187ce0cbSSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 859fef45726SSatish Balay int *HT,key; 8603eda8832SBarry Smith MatScalar **HD; 861329f5518SBarry Smith PetscReal tmp; 862aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 8634a15367fSSatish Balay int ct=0,max=0; 8644a15367fSSatish Balay #endif 865fef45726SSatish Balay 866d64ed03dSBarry Smith PetscFunctionBegin; 8670bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 8680bdbc534SSatish Balay size = baij->ht_size; 869fef45726SSatish Balay 8700bdbc534SSatish Balay if (baij->ht) { 8710bdbc534SSatish Balay PetscFunctionReturn(0); 872596b8d2eSBarry Smith } 8730bdbc534SSatish Balay 874fef45726SSatish Balay /* Allocate Memory for Hash Table */ 875b0a32e0cSBarry Smith ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 876b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 877b9e4cc15SSatish Balay HD = baij->hd; 878a07cd24cSSatish Balay HT = baij->ht; 879b9e4cc15SSatish Balay 880b9e4cc15SSatish Balay 88187828ca2SBarry Smith ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 8820bdbc534SSatish Balay 883596b8d2eSBarry Smith 884596b8d2eSBarry Smith /* Loop Over A */ 8850bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 886596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 8870bdbc534SSatish Balay row = i+rstart; 8880bdbc534SSatish Balay col = aj[j]+cstart; 889596b8d2eSBarry Smith 890187ce0cbSSatish Balay key = row*Nbs + col + 1; 891c2760754SSatish Balay h1 = HASH(size,key,tmp); 8920bdbc534SSatish Balay for (k=0; k<size; k++){ 8930bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 8940bdbc534SSatish Balay HT[(h1+k)%size] = key; 8950bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 896596b8d2eSBarry Smith break; 897aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 898187ce0cbSSatish Balay } else { 899187ce0cbSSatish Balay ct++; 900187ce0cbSSatish Balay #endif 901596b8d2eSBarry Smith } 902187ce0cbSSatish Balay } 903aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 904187ce0cbSSatish Balay if (k> max) max = k; 905187ce0cbSSatish Balay #endif 906596b8d2eSBarry Smith } 907596b8d2eSBarry Smith } 908596b8d2eSBarry Smith /* Loop Over B */ 9090bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 910596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 9110bdbc534SSatish Balay row = i+rstart; 9120bdbc534SSatish Balay col = garray[bj[j]]; 913187ce0cbSSatish Balay key = row*Nbs + col + 1; 914c2760754SSatish Balay h1 = HASH(size,key,tmp); 9150bdbc534SSatish Balay for (k=0; k<size; k++){ 9160bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 9170bdbc534SSatish Balay HT[(h1+k)%size] = key; 9180bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 919596b8d2eSBarry Smith break; 920aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 921187ce0cbSSatish Balay } else { 922187ce0cbSSatish Balay ct++; 923187ce0cbSSatish Balay #endif 924596b8d2eSBarry Smith } 925187ce0cbSSatish Balay } 926aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 927187ce0cbSSatish Balay if (k> max) max = k; 928187ce0cbSSatish Balay #endif 929596b8d2eSBarry Smith } 930596b8d2eSBarry Smith } 931596b8d2eSBarry Smith 932596b8d2eSBarry Smith /* Print Summary */ 933aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 934c38d4ed2SBarry Smith for (i=0,j=0; i<size; i++) { 935596b8d2eSBarry Smith if (HT[i]) {j++;} 936c38d4ed2SBarry Smith } 937b0a32e0cSBarry Smith PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 938187ce0cbSSatish Balay #endif 9393a40ed3dSBarry Smith PetscFunctionReturn(0); 940596b8d2eSBarry Smith } 94157b952d6SSatish Balay 9424a2ae208SSatish Balay #undef __FUNCT__ 9434a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 944bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 945bbb85fb3SSatish Balay { 946bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 947ff2fd236SBarry Smith int ierr,nstash,reallocs; 948bbb85fb3SSatish Balay InsertMode addv; 949bbb85fb3SSatish Balay 950bbb85fb3SSatish Balay PetscFunctionBegin; 951bbb85fb3SSatish Balay if (baij->donotstash) { 952bbb85fb3SSatish Balay PetscFunctionReturn(0); 953bbb85fb3SSatish Balay } 954bbb85fb3SSatish Balay 955bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 956bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 957bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 95829bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 959bbb85fb3SSatish Balay } 960bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 961bbb85fb3SSatish Balay 9628798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 9638798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 9648798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 965b0a32e0cSBarry Smith PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 96646680499SSatish Balay ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 967b0a32e0cSBarry Smith PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 968bbb85fb3SSatish Balay PetscFunctionReturn(0); 969bbb85fb3SSatish Balay } 970bbb85fb3SSatish Balay 9714a2ae208SSatish Balay #undef __FUNCT__ 9724a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 973bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 974bbb85fb3SSatish Balay { 975bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 976a2d1c673SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 977a2d1c673SSatish Balay int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 9787c922b88SBarry Smith int *row,*col,other_disassembled; 9797c922b88SBarry Smith PetscTruth r1,r2,r3; 9803eda8832SBarry Smith MatScalar *val; 981bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 982bbb85fb3SSatish Balay 983bbb85fb3SSatish Balay PetscFunctionBegin; 984bbb85fb3SSatish Balay if (!baij->donotstash) { 985a2d1c673SSatish Balay while (1) { 9868798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 987a2d1c673SSatish Balay if (!flg) break; 988a2d1c673SSatish Balay 989bbb85fb3SSatish Balay for (i=0; i<n;) { 990bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 991bbb85fb3SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 992bbb85fb3SSatish Balay if (j < n) ncols = j-i; 993bbb85fb3SSatish Balay else ncols = n-i; 994bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 99593fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 996bbb85fb3SSatish Balay i = j; 997bbb85fb3SSatish Balay } 998bbb85fb3SSatish Balay } 9998798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1000a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 1001a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 1002a2d1c673SSatish Balay restore the original flags */ 1003a2d1c673SSatish Balay r1 = baij->roworiented; 1004a2d1c673SSatish Balay r2 = a->roworiented; 1005a2d1c673SSatish Balay r3 = b->roworiented; 10067c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 10077c922b88SBarry Smith a->roworiented = PETSC_FALSE; 10087c922b88SBarry Smith b->roworiented = PETSC_FALSE; 1009a2d1c673SSatish Balay while (1) { 10108798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1011a2d1c673SSatish Balay if (!flg) break; 1012a2d1c673SSatish Balay 1013a2d1c673SSatish Balay for (i=0; i<n;) { 1014a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 1015a2d1c673SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1016a2d1c673SSatish Balay if (j < n) ncols = j-i; 1017a2d1c673SSatish Balay else ncols = n-i; 101893fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1019a2d1c673SSatish Balay i = j; 1020a2d1c673SSatish Balay } 1021a2d1c673SSatish Balay } 10228798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1023a2d1c673SSatish Balay baij->roworiented = r1; 1024a2d1c673SSatish Balay a->roworiented = r2; 1025a2d1c673SSatish Balay b->roworiented = r3; 1026bbb85fb3SSatish Balay } 1027bbb85fb3SSatish Balay 1028bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1029bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1030bbb85fb3SSatish Balay 1031bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 1032bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 1033bbb85fb3SSatish Balay /* 1034bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 1035bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 1036bbb85fb3SSatish Balay */ 1037bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1038bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1039bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 1040bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1041bbb85fb3SSatish Balay } 1042bbb85fb3SSatish Balay } 1043bbb85fb3SSatish Balay 1044bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1045bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1046bbb85fb3SSatish Balay } 1047bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1048bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1049bbb85fb3SSatish Balay 1050aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1051bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1052f6275e2eSBarry Smith PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1053bbb85fb3SSatish Balay baij->ht_total_ct = 0; 1054bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 1055bbb85fb3SSatish Balay } 1056bbb85fb3SSatish Balay #endif 1057bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1058bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1059bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1060bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1061bbb85fb3SSatish Balay } 1062bbb85fb3SSatish Balay 1063606d414cSSatish Balay if (baij->rowvalues) { 1064606d414cSSatish Balay ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1065606d414cSSatish Balay baij->rowvalues = 0; 1066606d414cSSatish Balay } 1067bbb85fb3SSatish Balay PetscFunctionReturn(0); 1068bbb85fb3SSatish Balay } 106957b952d6SSatish Balay 10704a2ae208SSatish Balay #undef __FUNCT__ 10714a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1072b0a32e0cSBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 107357b952d6SSatish Balay { 107457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1075fb9695e5SSatish Balay int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 10766831982aSBarry Smith PetscTruth isascii,isdraw; 1077b0a32e0cSBarry Smith PetscViewer sviewer; 1078f3ef73ceSBarry Smith PetscViewerFormat format; 107957b952d6SSatish Balay 1080d64ed03dSBarry Smith PetscFunctionBegin; 1081f81bd7ccSHong Zhang /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1082b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1083fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 10840f5bd95cSBarry Smith if (isascii) { 1085b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1086456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 10874e220ebcSLois Curfman McInnes MatInfo info; 1088d132466eSBarry Smith ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1089d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1090b0a32e0cSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1091273d9f13SBarry Smith rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 10926831982aSBarry Smith baij->bs,(int)info.memory);CHKERRQ(ierr); 1093d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1094b0a32e0cSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1095d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1096b0a32e0cSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1097b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 109857b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 10993a40ed3dSBarry Smith PetscFunctionReturn(0); 1100fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 1101b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 11023a40ed3dSBarry Smith PetscFunctionReturn(0); 110304929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 110404929863SHong Zhang PetscFunctionReturn(0); 110557b952d6SSatish Balay } 110657b952d6SSatish Balay } 110757b952d6SSatish Balay 11080f5bd95cSBarry Smith if (isdraw) { 1109b0a32e0cSBarry Smith PetscDraw draw; 111057b952d6SSatish Balay PetscTruth isnull; 1111b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1112b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 111357b952d6SSatish Balay } 111457b952d6SSatish Balay 111557b952d6SSatish Balay if (size == 1) { 1116873048abSBarry Smith ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 111757b952d6SSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1118d64ed03dSBarry Smith } else { 111957b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 112057b952d6SSatish Balay Mat A; 112157b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 1122273d9f13SBarry Smith int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 11233eda8832SBarry Smith MatScalar *a; 112457b952d6SSatish Balay 112557b952d6SSatish Balay if (!rank) { 112655843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1127d64ed03dSBarry Smith } else { 112855843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 112957b952d6SSatish Balay } 1130b0a32e0cSBarry Smith PetscLogObjectParent(mat,A); 113157b952d6SSatish Balay 113257b952d6SSatish Balay /* copy over the A part */ 113357b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 113457b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 113582502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 113657b952d6SSatish Balay 113757b952d6SSatish Balay for (i=0; i<mbs; i++) { 113857b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 113957b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 114057b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 114157b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 114257b952d6SSatish Balay for (k=0; k<bs; k++) { 114393fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1144cee3aa6bSSatish Balay col++; a += bs; 114557b952d6SSatish Balay } 114657b952d6SSatish Balay } 114757b952d6SSatish Balay } 114857b952d6SSatish Balay /* copy over the B part */ 114957b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 115057b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 115157b952d6SSatish Balay for (i=0; i<mbs; i++) { 115257b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 115357b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 115457b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 115557b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 115657b952d6SSatish Balay for (k=0; k<bs; k++) { 115793fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1158cee3aa6bSSatish Balay col++; a += bs; 115957b952d6SSatish Balay } 116057b952d6SSatish Balay } 116157b952d6SSatish Balay } 1162606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 11636d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11646d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 116555843e3eSBarry Smith /* 116655843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1167b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 116855843e3eSBarry Smith */ 1169b0a32e0cSBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1170f14a1c24SBarry Smith if (!rank) { 1171e36acaf3SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 11726831982aSBarry Smith ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 117357b952d6SSatish Balay } 1174b0a32e0cSBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 117557b952d6SSatish Balay ierr = MatDestroy(A);CHKERRQ(ierr); 117657b952d6SSatish Balay } 11773a40ed3dSBarry Smith PetscFunctionReturn(0); 117857b952d6SSatish Balay } 117957b952d6SSatish Balay 11804a2ae208SSatish Balay #undef __FUNCT__ 11814a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ" 1182b0a32e0cSBarry Smith int MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 118357b952d6SSatish Balay { 118457b952d6SSatish Balay int ierr; 11856831982aSBarry Smith PetscTruth isascii,isdraw,issocket,isbinary; 118657b952d6SSatish Balay 1187d64ed03dSBarry Smith PetscFunctionBegin; 1188b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1189fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1190b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1191fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 11920f5bd95cSBarry Smith if (isascii || isdraw || issocket || isbinary) { 11937b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 11945cd90555SBarry Smith } else { 119529bbc08cSBarry Smith SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 119657b952d6SSatish Balay } 11973a40ed3dSBarry Smith PetscFunctionReturn(0); 119857b952d6SSatish Balay } 119957b952d6SSatish Balay 12004a2ae208SSatish Balay #undef __FUNCT__ 12014a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ" 1202e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat) 120379bdfe76SSatish Balay { 120479bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 120579bdfe76SSatish Balay int ierr; 120679bdfe76SSatish Balay 1207d64ed03dSBarry Smith PetscFunctionBegin; 1208aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1209b0a32e0cSBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 121079bdfe76SSatish Balay #endif 12118798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 12128798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1213606d414cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 121479bdfe76SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 121579bdfe76SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1216aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 12170f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 121848e59246SSatish Balay #else 1219606d414cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 122048e59246SSatish Balay #endif 1221606d414cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1222606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1223606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1224606d414cSSatish Balay if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1225606d414cSSatish Balay if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1226606d414cSSatish Balay if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 12276fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 12286fa18ffdSBarry Smith if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 12296fa18ffdSBarry Smith #endif 1230606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 12313a40ed3dSBarry Smith PetscFunctionReturn(0); 123279bdfe76SSatish Balay } 123379bdfe76SSatish Balay 12344a2ae208SSatish Balay #undef __FUNCT__ 12354a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ" 1236ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1237cee3aa6bSSatish Balay { 1238cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 123947b4a8eaSLois Curfman McInnes int ierr,nt; 1240cee3aa6bSSatish Balay 1241d64ed03dSBarry Smith PetscFunctionBegin; 1242e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1243273d9f13SBarry Smith if (nt != A->n) { 124429bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 124547b4a8eaSLois Curfman McInnes } 1246e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1247273d9f13SBarry Smith if (nt != A->m) { 124829bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 124947b4a8eaSLois Curfman McInnes } 125043a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1251f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 125243a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1253f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 125443a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 12553a40ed3dSBarry Smith PetscFunctionReturn(0); 1256cee3aa6bSSatish Balay } 1257cee3aa6bSSatish Balay 12584a2ae208SSatish Balay #undef __FUNCT__ 12594a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1260ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1261cee3aa6bSSatish Balay { 1262cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1263cee3aa6bSSatish Balay int ierr; 1264d64ed03dSBarry Smith 1265d64ed03dSBarry Smith PetscFunctionBegin; 126643a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1267f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 126843a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1269f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 12703a40ed3dSBarry Smith PetscFunctionReturn(0); 1271cee3aa6bSSatish Balay } 1272cee3aa6bSSatish Balay 12734a2ae208SSatish Balay #undef __FUNCT__ 12744a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 12757c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1276cee3aa6bSSatish Balay { 1277cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1278cee3aa6bSSatish Balay int ierr; 1279cee3aa6bSSatish Balay 1280d64ed03dSBarry Smith PetscFunctionBegin; 1281cee3aa6bSSatish Balay /* do nondiagonal part */ 12827c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1283cee3aa6bSSatish Balay /* send it on its way */ 1284537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1285cee3aa6bSSatish Balay /* do local part */ 12867c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1287cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1288cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1289cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1290639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 12913a40ed3dSBarry Smith PetscFunctionReturn(0); 1292cee3aa6bSSatish Balay } 1293cee3aa6bSSatish Balay 12944a2ae208SSatish Balay #undef __FUNCT__ 12954a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 12967c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1297cee3aa6bSSatish Balay { 1298cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1299cee3aa6bSSatish Balay int ierr; 1300cee3aa6bSSatish Balay 1301d64ed03dSBarry Smith PetscFunctionBegin; 1302cee3aa6bSSatish Balay /* do nondiagonal part */ 13037c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1304cee3aa6bSSatish Balay /* send it on its way */ 1305537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1306cee3aa6bSSatish Balay /* do local part */ 13077c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1308cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1309cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1310cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1311537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 13123a40ed3dSBarry Smith PetscFunctionReturn(0); 1313cee3aa6bSSatish Balay } 1314cee3aa6bSSatish Balay 1315cee3aa6bSSatish Balay /* 1316cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1317cee3aa6bSSatish Balay diagonal block 1318cee3aa6bSSatish Balay */ 13194a2ae208SSatish Balay #undef __FUNCT__ 13204a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1321ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1322cee3aa6bSSatish Balay { 1323cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 13243a40ed3dSBarry Smith int ierr; 1325d64ed03dSBarry Smith 1326d64ed03dSBarry Smith PetscFunctionBegin; 1327273d9f13SBarry Smith if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 13283a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 13293a40ed3dSBarry Smith PetscFunctionReturn(0); 1330cee3aa6bSSatish Balay } 1331cee3aa6bSSatish Balay 13324a2ae208SSatish Balay #undef __FUNCT__ 13334a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ" 1334268466fbSBarry Smith int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1335cee3aa6bSSatish Balay { 1336cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1337cee3aa6bSSatish Balay int ierr; 1338d64ed03dSBarry Smith 1339d64ed03dSBarry Smith PetscFunctionBegin; 1340cee3aa6bSSatish Balay ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1341cee3aa6bSSatish Balay ierr = MatScale(aa,a->B);CHKERRQ(ierr); 13423a40ed3dSBarry Smith PetscFunctionReturn(0); 1343cee3aa6bSSatish Balay } 1344026e39d0SSatish Balay 13454a2ae208SSatish Balay #undef __FUNCT__ 13464a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ" 134787828ca2SBarry Smith int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1348acdf5bf4SSatish Balay { 1349acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 135087828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1351acdf5bf4SSatish Balay int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1352d9d09a02SSatish Balay int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1353d9d09a02SSatish Balay int *cmap,*idx_p,cstart = mat->cstart; 1354acdf5bf4SSatish Balay 1355d64ed03dSBarry Smith PetscFunctionBegin; 135629bbc08cSBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1357acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1358acdf5bf4SSatish Balay 1359acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1360acdf5bf4SSatish Balay /* 1361acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1362acdf5bf4SSatish Balay */ 1363acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1364bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1365bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1366acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1367acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1368acdf5bf4SSatish Balay } 136987828ca2SBarry Smith ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1370acdf5bf4SSatish Balay mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1371acdf5bf4SSatish Balay } 1372acdf5bf4SSatish Balay 137329bbc08cSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1374d9d09a02SSatish Balay lrow = row - brstart; 1375acdf5bf4SSatish Balay 1376acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1377acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1378acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1379f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1380f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1381acdf5bf4SSatish Balay nztot = nzA + nzB; 1382acdf5bf4SSatish Balay 1383acdf5bf4SSatish Balay cmap = mat->garray; 1384acdf5bf4SSatish Balay if (v || idx) { 1385acdf5bf4SSatish Balay if (nztot) { 1386acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1387acdf5bf4SSatish Balay int imark = -1; 1388acdf5bf4SSatish Balay if (v) { 1389acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1390acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1391d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1392acdf5bf4SSatish Balay else break; 1393acdf5bf4SSatish Balay } 1394acdf5bf4SSatish Balay imark = i; 1395acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1396acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1397acdf5bf4SSatish Balay } 1398acdf5bf4SSatish Balay if (idx) { 1399acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1400acdf5bf4SSatish Balay if (imark > -1) { 1401acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1402bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1403acdf5bf4SSatish Balay } 1404acdf5bf4SSatish Balay } else { 1405acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1406d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1407d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1408acdf5bf4SSatish Balay else break; 1409acdf5bf4SSatish Balay } 1410acdf5bf4SSatish Balay imark = i; 1411acdf5bf4SSatish Balay } 1412d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1413d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1414acdf5bf4SSatish Balay } 1415d64ed03dSBarry Smith } else { 1416d212a18eSSatish Balay if (idx) *idx = 0; 1417d212a18eSSatish Balay if (v) *v = 0; 1418d212a18eSSatish Balay } 1419acdf5bf4SSatish Balay } 1420acdf5bf4SSatish Balay *nz = nztot; 1421f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1422f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14233a40ed3dSBarry Smith PetscFunctionReturn(0); 1424acdf5bf4SSatish Balay } 1425acdf5bf4SSatish Balay 14264a2ae208SSatish Balay #undef __FUNCT__ 14274a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 142887828ca2SBarry Smith int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1429acdf5bf4SSatish Balay { 1430acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1431d64ed03dSBarry Smith 1432d64ed03dSBarry Smith PetscFunctionBegin; 1433acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 143429bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1435acdf5bf4SSatish Balay } 1436acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14373a40ed3dSBarry Smith PetscFunctionReturn(0); 1438acdf5bf4SSatish Balay } 1439acdf5bf4SSatish Balay 14404a2ae208SSatish Balay #undef __FUNCT__ 14414a2ae208SSatish Balay #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1442ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 14435a838052SSatish Balay { 14445a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1445d64ed03dSBarry Smith 1446d64ed03dSBarry Smith PetscFunctionBegin; 14475a838052SSatish Balay *bs = baij->bs; 14483a40ed3dSBarry Smith PetscFunctionReturn(0); 14495a838052SSatish Balay } 14505a838052SSatish Balay 14514a2ae208SSatish Balay #undef __FUNCT__ 14524a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1453ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 145458667388SSatish Balay { 145558667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 145658667388SSatish Balay int ierr; 1457d64ed03dSBarry Smith 1458d64ed03dSBarry Smith PetscFunctionBegin; 145958667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 146058667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14613a40ed3dSBarry Smith PetscFunctionReturn(0); 146258667388SSatish Balay } 14630ac07820SSatish Balay 14644a2ae208SSatish Balay #undef __FUNCT__ 14654a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1466ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14670ac07820SSatish Balay { 14684e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14694e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 14707d57db60SLois Curfman McInnes int ierr; 1471329f5518SBarry Smith PetscReal isend[5],irecv[5]; 14720ac07820SSatish Balay 1473d64ed03dSBarry Smith PetscFunctionBegin; 1474f6275e2eSBarry Smith info->block_size = (PetscReal)a->bs; 14754e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 14760e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1477de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 14784e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 14790e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1480de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 14810ac07820SSatish Balay if (flag == MAT_LOCAL) { 14824e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 14834e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 14844e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 14854e220ebcSLois Curfman McInnes info->memory = isend[3]; 14864e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 14870ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1488d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 14894e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14904e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14914e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14924e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14934e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 14940ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1495d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 14964e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14974e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14984e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14994e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15004e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1501d41123aaSBarry Smith } else { 150229bbc08cSBarry Smith SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 15030ac07820SSatish Balay } 1504f6275e2eSBarry Smith info->rows_global = (PetscReal)A->M; 1505f6275e2eSBarry Smith info->columns_global = (PetscReal)A->N; 1506f6275e2eSBarry Smith info->rows_local = (PetscReal)A->m; 1507f6275e2eSBarry Smith info->columns_local = (PetscReal)A->N; 15084e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 15094e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 15104e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 15113a40ed3dSBarry Smith PetscFunctionReturn(0); 15120ac07820SSatish Balay } 15130ac07820SSatish Balay 15144a2ae208SSatish Balay #undef __FUNCT__ 15154a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ" 1516ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 151758667388SSatish Balay { 151858667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 151998305bb5SBarry Smith int ierr; 152058667388SSatish Balay 1521d64ed03dSBarry Smith PetscFunctionBegin; 152212c028f9SKris Buschelman switch (op) { 152312c028f9SKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 152412c028f9SKris Buschelman case MAT_YES_NEW_NONZERO_LOCATIONS: 152512c028f9SKris Buschelman case MAT_COLUMNS_UNSORTED: 152612c028f9SKris Buschelman case MAT_COLUMNS_SORTED: 152712c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 152812c028f9SKris Buschelman case MAT_KEEP_ZEROED_ROWS: 152912c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 153098305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 153198305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 153212c028f9SKris Buschelman break; 153312c028f9SKris Buschelman case MAT_ROW_ORIENTED: 15347c922b88SBarry Smith a->roworiented = PETSC_TRUE; 153598305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 153698305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 153712c028f9SKris Buschelman break; 153812c028f9SKris Buschelman case MAT_ROWS_SORTED: 153912c028f9SKris Buschelman case MAT_ROWS_UNSORTED: 154012c028f9SKris Buschelman case MAT_YES_NEW_DIAGONALS: 1541b0a32e0cSBarry Smith PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 154212c028f9SKris Buschelman break; 154312c028f9SKris Buschelman case MAT_COLUMN_ORIENTED: 15447c922b88SBarry Smith a->roworiented = PETSC_FALSE; 154598305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 154698305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 154712c028f9SKris Buschelman break; 154812c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 15497c922b88SBarry Smith a->donotstash = PETSC_TRUE; 155012c028f9SKris Buschelman break; 155112c028f9SKris Buschelman case MAT_NO_NEW_DIAGONALS: 155229bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 155312c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 15547c922b88SBarry Smith a->ht_flag = PETSC_TRUE; 155512c028f9SKris Buschelman break; 155612c028f9SKris Buschelman default: 155729bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"unknown option"); 1558d64ed03dSBarry Smith } 15593a40ed3dSBarry Smith PetscFunctionReturn(0); 156058667388SSatish Balay } 156158667388SSatish Balay 15624a2ae208SSatish Balay #undef __FUNCT__ 15634a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1564ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 15650ac07820SSatish Balay { 15660ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 15670ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15680ac07820SSatish Balay Mat B; 1569273d9f13SBarry Smith int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 15700ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 15713eda8832SBarry Smith MatScalar *a; 15720ac07820SSatish Balay 1573d64ed03dSBarry Smith PetscFunctionBegin; 157429bbc08cSBarry Smith if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1575273d9f13SBarry Smith ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 15760ac07820SSatish Balay 15770ac07820SSatish Balay /* copy over the A part */ 15780ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 15790ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 158082502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 15810ac07820SSatish Balay 15820ac07820SSatish Balay for (i=0; i<mbs; i++) { 15830ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 15840ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 15850ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 15860ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 15870ac07820SSatish Balay for (k=0; k<bs; k++) { 158893fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 15890ac07820SSatish Balay col++; a += bs; 15900ac07820SSatish Balay } 15910ac07820SSatish Balay } 15920ac07820SSatish Balay } 15930ac07820SSatish Balay /* copy over the B part */ 15940ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 15950ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 15960ac07820SSatish Balay for (i=0; i<mbs; i++) { 15970ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 15980ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 15990ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16000ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 16010ac07820SSatish Balay for (k=0; k<bs; k++) { 160293fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16030ac07820SSatish Balay col++; a += bs; 16040ac07820SSatish Balay } 16050ac07820SSatish Balay } 16060ac07820SSatish Balay } 1607606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 16080ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16090ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16100ac07820SSatish Balay 16117c922b88SBarry Smith if (matout) { 16120ac07820SSatish Balay *matout = B; 16130ac07820SSatish Balay } else { 1614273d9f13SBarry Smith ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 16150ac07820SSatish Balay } 16163a40ed3dSBarry Smith PetscFunctionReturn(0); 16170ac07820SSatish Balay } 16180e95ebc0SSatish Balay 16194a2ae208SSatish Balay #undef __FUNCT__ 16204a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 162136c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 16220e95ebc0SSatish Balay { 162336c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 162436c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 16250e95ebc0SSatish Balay int ierr,s1,s2,s3; 16260e95ebc0SSatish Balay 1627d64ed03dSBarry Smith PetscFunctionBegin; 162836c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 162936c4a09eSSatish Balay if (rr) { 163036c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 163129bbc08cSBarry Smith if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 163236c4a09eSSatish Balay /* Overlap communication with computation. */ 163336c4a09eSSatish Balay ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 163436c4a09eSSatish Balay } 16350e95ebc0SSatish Balay if (ll) { 16360e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 163729bbc08cSBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1638a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 16390e95ebc0SSatish Balay } 164036c4a09eSSatish Balay /* scale the diagonal block */ 164136c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 164236c4a09eSSatish Balay 164336c4a09eSSatish Balay if (rr) { 164436c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 164536c4a09eSSatish Balay ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1646a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 164736c4a09eSSatish Balay } 164836c4a09eSSatish Balay 16493a40ed3dSBarry Smith PetscFunctionReturn(0); 16500e95ebc0SSatish Balay } 16510e95ebc0SSatish Balay 16524a2ae208SSatish Balay #undef __FUNCT__ 16534a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1654268466fbSBarry Smith int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 16550ac07820SSatish Balay { 16560ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 16570ac07820SSatish Balay int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1658c1dc657dSBarry Smith int *nprocs,j,idx,nsends,row; 16590ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 16600ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1661a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 16620ac07820SSatish Balay MPI_Comm comm = A->comm; 16630ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 16640ac07820SSatish Balay MPI_Status recv_status,*send_status; 16650ac07820SSatish Balay IS istmp; 166635d8aa7fSBarry Smith PetscTruth found; 16670ac07820SSatish Balay 1668d64ed03dSBarry Smith PetscFunctionBegin; 1669f14a1c24SBarry Smith ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 16700ac07820SSatish Balay ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 16710ac07820SSatish Balay 16720ac07820SSatish Balay /* first count number of contributors to each processor */ 167382502324SSatish Balay ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1674549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1675b0a32e0cSBarry Smith ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 16760ac07820SSatish Balay for (i=0; i<N; i++) { 16770ac07820SSatish Balay idx = rows[i]; 167835d8aa7fSBarry Smith found = PETSC_FALSE; 16790ac07820SSatish Balay for (j=0; j<size; j++) { 16800ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1681c1dc657dSBarry Smith nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 16820ac07820SSatish Balay } 16830ac07820SSatish Balay } 168429bbc08cSBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 16850ac07820SSatish Balay } 1686c1dc657dSBarry Smith nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 16870ac07820SSatish Balay 16880ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 1689c1dc657dSBarry Smith ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 16900ac07820SSatish Balay 16910ac07820SSatish Balay /* post receives: */ 1692b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1693b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 16940ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 1695ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 16960ac07820SSatish Balay } 16970ac07820SSatish Balay 16980ac07820SSatish Balay /* do sends: 16990ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 17000ac07820SSatish Balay the ith processor 17010ac07820SSatish Balay */ 1702b0a32e0cSBarry Smith ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1703b0a32e0cSBarry Smith ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1704b0a32e0cSBarry Smith ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 17050ac07820SSatish Balay starts[0] = 0; 1706c1dc657dSBarry Smith for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17070ac07820SSatish Balay for (i=0; i<N; i++) { 17080ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 17090ac07820SSatish Balay } 17106831982aSBarry Smith ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 17110ac07820SSatish Balay 17120ac07820SSatish Balay starts[0] = 0; 1713c1dc657dSBarry Smith for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17140ac07820SSatish Balay count = 0; 17150ac07820SSatish Balay for (i=0; i<size; i++) { 1716c1dc657dSBarry Smith if (nprocs[2*i+1]) { 1717c1dc657dSBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 17180ac07820SSatish Balay } 17190ac07820SSatish Balay } 1720606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 17210ac07820SSatish Balay 17220ac07820SSatish Balay base = owners[rank]*bs; 17230ac07820SSatish Balay 17240ac07820SSatish Balay /* wait on receives */ 1725b0a32e0cSBarry Smith ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 17260ac07820SSatish Balay source = lens + nrecvs; 17270ac07820SSatish Balay count = nrecvs; slen = 0; 17280ac07820SSatish Balay while (count) { 1729ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17300ac07820SSatish Balay /* unpack receives into our local space */ 1731ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 17320ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17330ac07820SSatish Balay lens[imdex] = n; 17340ac07820SSatish Balay slen += n; 17350ac07820SSatish Balay count--; 17360ac07820SSatish Balay } 1737606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 17380ac07820SSatish Balay 17390ac07820SSatish Balay /* move the data into the send scatter */ 1740b0a32e0cSBarry Smith ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 17410ac07820SSatish Balay count = 0; 17420ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 17430ac07820SSatish Balay values = rvalues + i*nmax; 17440ac07820SSatish Balay for (j=0; j<lens[i]; j++) { 17450ac07820SSatish Balay lrows[count++] = values[j] - base; 17460ac07820SSatish Balay } 17470ac07820SSatish Balay } 1748606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 1749606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1750606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 1751606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 17520ac07820SSatish Balay 17530ac07820SSatish Balay /* actually zap the local rows */ 1754029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1755b0a32e0cSBarry Smith PetscLogObjectParent(A,istmp); 1756a07cd24cSSatish Balay 175772dacd9aSBarry Smith /* 175872dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 175972dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 176072dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 176172dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 176272dacd9aSBarry Smith 176372dacd9aSBarry Smith Contributed by: Mathew Knepley 176472dacd9aSBarry Smith */ 17659c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 17666fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 17679c957beeSSatish Balay if (diag && (l->A->M == l->A->N)) { 17686fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 17699c957beeSSatish Balay } else if (diag) { 17706fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1771fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 177229bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1773fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 17746525c446SSatish Balay } 1775a07cd24cSSatish Balay for (i=0; i<slen; i++) { 1776a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 17773f11ea53SBarry Smith ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1778a07cd24cSSatish Balay } 1779a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1780a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17819c957beeSSatish Balay } else { 17826fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1783a07cd24cSSatish Balay } 17849c957beeSSatish Balay 17859c957beeSSatish Balay ierr = ISDestroy(istmp);CHKERRQ(ierr); 1786606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 1787a07cd24cSSatish Balay 17880ac07820SSatish Balay /* wait on sends */ 17890ac07820SSatish Balay if (nsends) { 179082502324SSatish Balay ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1791ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1792606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 17930ac07820SSatish Balay } 1794606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 1795606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 17960ac07820SSatish Balay 17973a40ed3dSBarry Smith PetscFunctionReturn(0); 17980ac07820SSatish Balay } 179972dacd9aSBarry Smith 18004a2ae208SSatish Balay #undef __FUNCT__ 18014a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1802ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1803ba4ca20aSSatish Balay { 1804ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 180525fdafccSSatish Balay MPI_Comm comm = A->comm; 1806133cdb44SSatish Balay static int called = 0; 18073a40ed3dSBarry Smith int ierr; 1808ba4ca20aSSatish Balay 1809d64ed03dSBarry Smith PetscFunctionBegin; 18103a40ed3dSBarry Smith if (!a->rank) { 18113a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 181225fdafccSSatish Balay } 181325fdafccSSatish Balay if (called) {PetscFunctionReturn(0);} else called = 1; 1814d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1815d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 18163a40ed3dSBarry Smith PetscFunctionReturn(0); 1817ba4ca20aSSatish Balay } 18180ac07820SSatish Balay 18194a2ae208SSatish Balay #undef __FUNCT__ 18204a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1821ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1822bb5a7306SBarry Smith { 1823bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1824bb5a7306SBarry Smith int ierr; 1825d64ed03dSBarry Smith 1826d64ed03dSBarry Smith PetscFunctionBegin; 1827bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 18283a40ed3dSBarry Smith PetscFunctionReturn(0); 1829bb5a7306SBarry Smith } 1830bb5a7306SBarry Smith 18312e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18320ac07820SSatish Balay 18334a2ae208SSatish Balay #undef __FUNCT__ 18344a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ" 18357fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 18367fc3c18eSBarry Smith { 18377fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 18387fc3c18eSBarry Smith Mat a,b,c,d; 18397fc3c18eSBarry Smith PetscTruth flg; 18407fc3c18eSBarry Smith int ierr; 18417fc3c18eSBarry Smith 18427fc3c18eSBarry Smith PetscFunctionBegin; 18437fc3c18eSBarry Smith a = matA->A; b = matA->B; 18447fc3c18eSBarry Smith c = matB->A; d = matB->B; 18457fc3c18eSBarry Smith 18467fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 18477fc3c18eSBarry Smith if (flg == PETSC_TRUE) { 18487fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18497fc3c18eSBarry Smith } 18507fc3c18eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 18517fc3c18eSBarry Smith PetscFunctionReturn(0); 18527fc3c18eSBarry Smith } 18537fc3c18eSBarry Smith 1854273d9f13SBarry Smith 18554a2ae208SSatish Balay #undef __FUNCT__ 18564a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1857273d9f13SBarry Smith int MatSetUpPreallocation_MPIBAIJ(Mat A) 1858273d9f13SBarry Smith { 1859273d9f13SBarry Smith int ierr; 1860273d9f13SBarry Smith 1861273d9f13SBarry Smith PetscFunctionBegin; 1862273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1863273d9f13SBarry Smith PetscFunctionReturn(0); 1864273d9f13SBarry Smith } 1865273d9f13SBarry Smith 186679bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1867cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1868cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1869cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1870cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1871cc2dc46cSBarry Smith MatMult_MPIBAIJ, 1872cc2dc46cSBarry Smith MatMultAdd_MPIBAIJ, 18737c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 18747c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 1875cc2dc46cSBarry Smith 0, 1876cc2dc46cSBarry Smith 0, 1877cc2dc46cSBarry Smith 0, 1878cc2dc46cSBarry Smith 0, 1879cc2dc46cSBarry Smith 0, 1880cc2dc46cSBarry Smith 0, 1881cc2dc46cSBarry Smith 0, 1882cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 1883cc2dc46cSBarry Smith MatGetInfo_MPIBAIJ, 18847fc3c18eSBarry Smith MatEqual_MPIBAIJ, 1885cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1886cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1887cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 1888cc2dc46cSBarry Smith MatAssemblyBegin_MPIBAIJ, 1889cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1890cc2dc46cSBarry Smith 0, 1891cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1892cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 1893cc2dc46cSBarry Smith MatZeroRows_MPIBAIJ, 1894cc2dc46cSBarry Smith 0, 1895cc2dc46cSBarry Smith 0, 1896cc2dc46cSBarry Smith 0, 1897cc2dc46cSBarry Smith 0, 1898273d9f13SBarry Smith MatSetUpPreallocation_MPIBAIJ, 1899273d9f13SBarry Smith 0, 1900cc2dc46cSBarry Smith 0, 1901cc2dc46cSBarry Smith 0, 1902cc2dc46cSBarry Smith 0, 19032e8a6d31SBarry Smith MatDuplicate_MPIBAIJ, 1904cc2dc46cSBarry Smith 0, 1905cc2dc46cSBarry Smith 0, 1906cc2dc46cSBarry Smith 0, 1907cc2dc46cSBarry Smith 0, 1908cc2dc46cSBarry Smith 0, 1909cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1910cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1911cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1912cc2dc46cSBarry Smith 0, 1913cc2dc46cSBarry Smith MatPrintHelp_MPIBAIJ, 1914cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1915cc2dc46cSBarry Smith 0, 1916cc2dc46cSBarry Smith 0, 1917cc2dc46cSBarry Smith 0, 1918cc2dc46cSBarry Smith MatGetBlockSize_MPIBAIJ, 1919cc2dc46cSBarry Smith 0, 1920cc2dc46cSBarry Smith 0, 1921cc2dc46cSBarry Smith 0, 1922cc2dc46cSBarry Smith 0, 1923cc2dc46cSBarry Smith 0, 1924cc2dc46cSBarry Smith 0, 1925cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 1926cc2dc46cSBarry Smith 0, 1927cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 1928cc2dc46cSBarry Smith 0, 1929f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 1930f14a1c24SBarry Smith MatView_MPIBAIJ, 19318a124369SBarry Smith MatGetPetscMaps_Petsc, 19327843d17aSBarry Smith 0, 19337843d17aSBarry Smith 0, 19347843d17aSBarry Smith 0, 19357843d17aSBarry Smith 0, 19367843d17aSBarry Smith 0, 19377843d17aSBarry Smith 0, 19387843d17aSBarry Smith MatGetRowMax_MPIBAIJ}; 193979bdfe76SSatish Balay 19405ef9f2a5SBarry Smith 1941e18c124aSSatish Balay EXTERN_C_BEGIN 19424a2ae208SSatish Balay #undef __FUNCT__ 19434a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 19445ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 19455ef9f2a5SBarry Smith { 19465ef9f2a5SBarry Smith PetscFunctionBegin; 19475ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 19485ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 19495ef9f2a5SBarry Smith PetscFunctionReturn(0); 19505ef9f2a5SBarry Smith } 1951e18c124aSSatish Balay EXTERN_C_END 195279bdfe76SSatish Balay 1953273d9f13SBarry Smith EXTERN_C_BEGIN 19544a2ae208SSatish Balay #undef __FUNCT__ 1955a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 1956a23d5eceSKris Buschelman int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1957a23d5eceSKris Buschelman { 1958a23d5eceSKris Buschelman Mat_MPIBAIJ *b; 1959a23d5eceSKris Buschelman int ierr,i; 1960a23d5eceSKris Buschelman 1961a23d5eceSKris Buschelman PetscFunctionBegin; 1962a23d5eceSKris Buschelman B->preallocated = PETSC_TRUE; 1963a23d5eceSKris Buschelman ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1964a23d5eceSKris Buschelman 1965a23d5eceSKris Buschelman if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1966a23d5eceSKris Buschelman if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1967a23d5eceSKris Buschelman if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1968a23d5eceSKris Buschelman if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1969a23d5eceSKris Buschelman if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1970a23d5eceSKris Buschelman if (d_nnz) { 1971a23d5eceSKris Buschelman for (i=0; i<B->m/bs; i++) { 1972a23d5eceSKris Buschelman if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1973a23d5eceSKris Buschelman } 1974a23d5eceSKris Buschelman } 1975a23d5eceSKris Buschelman if (o_nnz) { 1976a23d5eceSKris Buschelman for (i=0; i<B->m/bs; i++) { 1977a23d5eceSKris Buschelman if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1978a23d5eceSKris Buschelman } 1979a23d5eceSKris Buschelman } 1980a23d5eceSKris Buschelman 1981a23d5eceSKris Buschelman ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1982a23d5eceSKris Buschelman ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1983a23d5eceSKris Buschelman ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1984a23d5eceSKris Buschelman ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1985a23d5eceSKris Buschelman 1986a23d5eceSKris Buschelman b = (Mat_MPIBAIJ*)B->data; 1987a23d5eceSKris Buschelman b->bs = bs; 1988a23d5eceSKris Buschelman b->bs2 = bs*bs; 1989a23d5eceSKris Buschelman b->mbs = B->m/bs; 1990a23d5eceSKris Buschelman b->nbs = B->n/bs; 1991a23d5eceSKris Buschelman b->Mbs = B->M/bs; 1992a23d5eceSKris Buschelman b->Nbs = B->N/bs; 1993a23d5eceSKris Buschelman 1994a23d5eceSKris Buschelman ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1995a23d5eceSKris Buschelman b->rowners[0] = 0; 1996a23d5eceSKris Buschelman for (i=2; i<=b->size; i++) { 1997a23d5eceSKris Buschelman b->rowners[i] += b->rowners[i-1]; 1998a23d5eceSKris Buschelman } 1999a23d5eceSKris Buschelman b->rstart = b->rowners[b->rank]; 2000a23d5eceSKris Buschelman b->rend = b->rowners[b->rank+1]; 2001a23d5eceSKris Buschelman 2002a23d5eceSKris Buschelman ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2003a23d5eceSKris Buschelman b->cowners[0] = 0; 2004a23d5eceSKris Buschelman for (i=2; i<=b->size; i++) { 2005a23d5eceSKris Buschelman b->cowners[i] += b->cowners[i-1]; 2006a23d5eceSKris Buschelman } 2007a23d5eceSKris Buschelman b->cstart = b->cowners[b->rank]; 2008a23d5eceSKris Buschelman b->cend = b->cowners[b->rank+1]; 2009a23d5eceSKris Buschelman 2010a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2011a23d5eceSKris Buschelman b->rowners_bs[i] = b->rowners[i]*bs; 2012a23d5eceSKris Buschelman } 2013a23d5eceSKris Buschelman b->rstart_bs = b->rstart*bs; 2014a23d5eceSKris Buschelman b->rend_bs = b->rend*bs; 2015a23d5eceSKris Buschelman b->cstart_bs = b->cstart*bs; 2016a23d5eceSKris Buschelman b->cend_bs = b->cend*bs; 2017a23d5eceSKris Buschelman 2018a23d5eceSKris Buschelman ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2019a23d5eceSKris Buschelman PetscLogObjectParent(B,b->A); 2020a23d5eceSKris Buschelman ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2021a23d5eceSKris Buschelman PetscLogObjectParent(B,b->B); 2022a23d5eceSKris Buschelman ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2023a23d5eceSKris Buschelman 2024a23d5eceSKris Buschelman PetscFunctionReturn(0); 2025a23d5eceSKris Buschelman } 2026a23d5eceSKris Buschelman EXTERN_C_END 2027a23d5eceSKris Buschelman 2028a23d5eceSKris Buschelman EXTERN_C_BEGIN 202992b32695SKris Buschelman EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 20305bf65638SKris Buschelman EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 203192b32695SKris Buschelman EXTERN_C_END 20325bf65638SKris Buschelman 203392b32695SKris Buschelman EXTERN_C_BEGIN 2034a23d5eceSKris Buschelman #undef __FUNCT__ 20354a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ" 2036273d9f13SBarry Smith int MatCreate_MPIBAIJ(Mat B) 2037273d9f13SBarry Smith { 2038273d9f13SBarry Smith Mat_MPIBAIJ *b; 2039cfce73b9SSatish Balay int ierr; 2040273d9f13SBarry Smith PetscTruth flg; 2041273d9f13SBarry Smith 2042273d9f13SBarry Smith PetscFunctionBegin; 2043273d9f13SBarry Smith 204482502324SSatish Balay ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 204582502324SSatish Balay B->data = (void*)b; 204682502324SSatish Balay 2047273d9f13SBarry Smith ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2048273d9f13SBarry Smith ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2049273d9f13SBarry Smith B->mapping = 0; 2050273d9f13SBarry Smith B->factor = 0; 2051273d9f13SBarry Smith B->assembled = PETSC_FALSE; 2052273d9f13SBarry Smith 2053273d9f13SBarry Smith B->insertmode = NOT_SET_VALUES; 2054273d9f13SBarry Smith ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2055273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2056273d9f13SBarry Smith 2057273d9f13SBarry Smith /* build local table of row and column ownerships */ 205882502324SSatish Balay ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 2059b0a32e0cSBarry Smith PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2060273d9f13SBarry Smith b->cowners = b->rowners + b->size + 2; 2061273d9f13SBarry Smith b->rowners_bs = b->cowners + b->size + 2; 2062273d9f13SBarry Smith 2063273d9f13SBarry Smith /* build cache for off array entries formed */ 2064273d9f13SBarry Smith ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2065273d9f13SBarry Smith b->donotstash = PETSC_FALSE; 2066273d9f13SBarry Smith b->colmap = PETSC_NULL; 2067273d9f13SBarry Smith b->garray = PETSC_NULL; 2068273d9f13SBarry Smith b->roworiented = PETSC_TRUE; 2069273d9f13SBarry Smith 2070cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 2071273d9f13SBarry Smith /* stuff for MatSetValues_XXX in single precision */ 207264a35ccbSBarry Smith b->setvalueslen = 0; 2073273d9f13SBarry Smith b->setvaluescopy = PETSC_NULL; 2074273d9f13SBarry Smith #endif 2075273d9f13SBarry Smith 2076273d9f13SBarry Smith /* stuff used in block assembly */ 2077273d9f13SBarry Smith b->barray = 0; 2078273d9f13SBarry Smith 2079273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 2080273d9f13SBarry Smith b->lvec = 0; 2081273d9f13SBarry Smith b->Mvctx = 0; 2082273d9f13SBarry Smith 2083273d9f13SBarry Smith /* stuff for MatGetRow() */ 2084273d9f13SBarry Smith b->rowindices = 0; 2085273d9f13SBarry Smith b->rowvalues = 0; 2086273d9f13SBarry Smith b->getrowactive = PETSC_FALSE; 2087273d9f13SBarry Smith 2088273d9f13SBarry Smith /* hash table stuff */ 2089273d9f13SBarry Smith b->ht = 0; 2090273d9f13SBarry Smith b->hd = 0; 2091273d9f13SBarry Smith b->ht_size = 0; 2092273d9f13SBarry Smith b->ht_flag = PETSC_FALSE; 2093273d9f13SBarry Smith b->ht_fact = 0; 2094273d9f13SBarry Smith b->ht_total_ct = 0; 2095273d9f13SBarry Smith b->ht_insert_ct = 0; 2096273d9f13SBarry Smith 2097b0a32e0cSBarry Smith ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2098273d9f13SBarry Smith if (flg) { 2099f6275e2eSBarry Smith PetscReal fact = 1.39; 2100273d9f13SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 210187828ca2SBarry Smith ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2102273d9f13SBarry Smith if (fact <= 1.0) fact = 1.39; 2103273d9f13SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2104b0a32e0cSBarry Smith PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2105273d9f13SBarry Smith } 2106273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2107273d9f13SBarry Smith "MatStoreValues_MPIBAIJ", 2108273d9f13SBarry Smith MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2109273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2110273d9f13SBarry Smith "MatRetrieveValues_MPIBAIJ", 2111273d9f13SBarry Smith MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2112273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2113273d9f13SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 2114273d9f13SBarry Smith MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2115a23d5eceSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2116a23d5eceSKris Buschelman "MatMPIBAIJSetPreallocation_MPIBAIJ", 2117a23d5eceSKris Buschelman MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 211892b32695SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 211992b32695SKris Buschelman "MatDiagonalScaleLocal_MPIBAIJ", 212092b32695SKris Buschelman MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 21215bf65638SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 21225bf65638SKris Buschelman "MatSetHashTableFactor_MPIBAIJ", 21235bf65638SKris Buschelman MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2124273d9f13SBarry Smith PetscFunctionReturn(0); 2125273d9f13SBarry Smith } 2126273d9f13SBarry Smith EXTERN_C_END 2127273d9f13SBarry Smith 21284a2ae208SSatish Balay #undef __FUNCT__ 21294a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2130273d9f13SBarry Smith /*@C 2131273d9f13SBarry Smith MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2132273d9f13SBarry Smith (block compressed row). For good matrix assembly performance 2133273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 2134273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2135273d9f13SBarry Smith performance can be increased by more than a factor of 50. 2136273d9f13SBarry Smith 2137273d9f13SBarry Smith Collective on Mat 2138273d9f13SBarry Smith 2139273d9f13SBarry Smith Input Parameters: 2140273d9f13SBarry Smith + A - the matrix 2141273d9f13SBarry Smith . bs - size of blockk 2142273d9f13SBarry Smith . d_nz - number of block nonzeros per block row in diagonal portion of local 2143273d9f13SBarry Smith submatrix (same for all local rows) 2144273d9f13SBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 2145273d9f13SBarry Smith of the in diagonal portion of the local (possibly different for each block 2146273d9f13SBarry Smith row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2147273d9f13SBarry Smith . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2148273d9f13SBarry Smith submatrix (same for all local rows). 2149273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 2150273d9f13SBarry Smith off-diagonal portion of the local submatrix (possibly different for 2151273d9f13SBarry Smith each block row) or PETSC_NULL. 2152273d9f13SBarry Smith 2153273d9f13SBarry Smith Output Parameter: 2154273d9f13SBarry Smith 2155273d9f13SBarry Smith 2156273d9f13SBarry Smith Options Database Keys: 2157273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 2158273d9f13SBarry Smith block calculations (much slower) 2159273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 2160273d9f13SBarry Smith 2161273d9f13SBarry Smith Notes: 2162273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2163273d9f13SBarry Smith than it must be used on all processors that share the object for that argument. 2164273d9f13SBarry Smith 2165273d9f13SBarry Smith Storage Information: 2166273d9f13SBarry Smith For a square global matrix we define each processor's diagonal portion 2167273d9f13SBarry Smith to be its local rows and the corresponding columns (a square submatrix); 2168273d9f13SBarry Smith each processor's off-diagonal portion encompasses the remainder of the 2169273d9f13SBarry Smith local matrix (a rectangular submatrix). 2170273d9f13SBarry Smith 2171273d9f13SBarry Smith The user can specify preallocated storage for the diagonal part of 2172273d9f13SBarry Smith the local submatrix with either d_nz or d_nnz (not both). Set 2173273d9f13SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2174273d9f13SBarry Smith memory allocation. Likewise, specify preallocated storage for the 2175273d9f13SBarry Smith off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2176273d9f13SBarry Smith 2177273d9f13SBarry Smith Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2178273d9f13SBarry Smith the figure below we depict these three local rows and all columns (0-11). 2179273d9f13SBarry Smith 2180273d9f13SBarry Smith .vb 2181273d9f13SBarry Smith 0 1 2 3 4 5 6 7 8 9 10 11 2182273d9f13SBarry Smith ------------------- 2183273d9f13SBarry Smith row 3 | o o o d d d o o o o o o 2184273d9f13SBarry Smith row 4 | o o o d d d o o o o o o 2185273d9f13SBarry Smith row 5 | o o o d d d o o o o o o 2186273d9f13SBarry Smith ------------------- 2187273d9f13SBarry Smith .ve 2188273d9f13SBarry Smith 2189273d9f13SBarry Smith Thus, any entries in the d locations are stored in the d (diagonal) 2190273d9f13SBarry Smith submatrix, and any entries in the o locations are stored in the 2191273d9f13SBarry Smith o (off-diagonal) submatrix. Note that the d and the o submatrices are 2192273d9f13SBarry Smith stored simply in the MATSEQBAIJ format for compressed row storage. 2193273d9f13SBarry Smith 2194273d9f13SBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2195273d9f13SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 2196273d9f13SBarry Smith In general, for PDE problems in which most nonzeros are near the diagonal, 2197273d9f13SBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2198273d9f13SBarry Smith or you will get TERRIBLE performance; see the users' manual chapter on 2199273d9f13SBarry Smith matrices. 2200273d9f13SBarry Smith 2201273d9f13SBarry Smith Level: intermediate 2202273d9f13SBarry Smith 2203273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel 2204273d9f13SBarry Smith 2205273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2206273d9f13SBarry Smith @*/ 2207ca01db9bSBarry Smith int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2208273d9f13SBarry Smith { 2209ca01db9bSBarry Smith int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 2210273d9f13SBarry Smith 2211273d9f13SBarry Smith PetscFunctionBegin; 2212a23d5eceSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2213a23d5eceSKris Buschelman if (f) { 2214a23d5eceSKris Buschelman ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2215273d9f13SBarry Smith } 2216273d9f13SBarry Smith PetscFunctionReturn(0); 2217273d9f13SBarry Smith } 2218273d9f13SBarry Smith 22194a2ae208SSatish Balay #undef __FUNCT__ 22204a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ" 222179bdfe76SSatish Balay /*@C 222279bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 222379bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 222479bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 222579bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 222679bdfe76SSatish Balay performance can be increased by more than a factor of 50. 222779bdfe76SSatish Balay 2228db81eaa0SLois Curfman McInnes Collective on MPI_Comm 2229db81eaa0SLois Curfman McInnes 223079bdfe76SSatish Balay Input Parameters: 2231db81eaa0SLois Curfman McInnes + comm - MPI communicator 223279bdfe76SSatish Balay . bs - size of blockk 223379bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 223492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 223592e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 223692e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 223792e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 223892e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 2239be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2240be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 224147a75d0bSBarry Smith . d_nz - number of nonzero blocks per block row in diagonal portion of local 224279bdfe76SSatish Balay submatrix (same for all local rows) 224347a75d0bSBarry Smith . d_nnz - array containing the number of nonzero blocks in the various block rows 224492e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 2245db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 224647a75d0bSBarry Smith . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 224779bdfe76SSatish Balay submatrix (same for all local rows). 224847a75d0bSBarry Smith - o_nnz - array containing the number of nonzero blocks in the various block rows of the 224992e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 225092e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 225179bdfe76SSatish Balay 225279bdfe76SSatish Balay Output Parameter: 225379bdfe76SSatish Balay . A - the matrix 225479bdfe76SSatish Balay 2255db81eaa0SLois Curfman McInnes Options Database Keys: 2256db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 2257db81eaa0SLois Curfman McInnes block calculations (much slower) 2258db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 22593ffaccefSLois Curfman McInnes 2260b259b22eSLois Curfman McInnes Notes: 226147a75d0bSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 226247a75d0bSBarry Smith 226379bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 226479bdfe76SSatish Balay (possibly both). 226579bdfe76SSatish Balay 2266be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2267be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 2268be79a94dSBarry Smith 226979bdfe76SSatish Balay Storage Information: 227079bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 227179bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 227279bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 227379bdfe76SSatish Balay local matrix (a rectangular submatrix). 227479bdfe76SSatish Balay 227579bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 227679bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 227779bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 227879bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 227979bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 228079bdfe76SSatish Balay 228179bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 228279bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 228379bdfe76SSatish Balay 2284db81eaa0SLois Curfman McInnes .vb 2285db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 2286db81eaa0SLois Curfman McInnes ------------------- 2287db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 2288db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 2289db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 2290db81eaa0SLois Curfman McInnes ------------------- 2291db81eaa0SLois Curfman McInnes .ve 229279bdfe76SSatish Balay 229379bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 229479bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 229579bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 229657b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 229779bdfe76SSatish Balay 2298d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2299d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 230079bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 230192e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 230292e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 23036da5968aSLois Curfman McInnes matrices. 230479bdfe76SSatish Balay 2305027ccd11SLois Curfman McInnes Level: intermediate 2306027ccd11SLois Curfman McInnes 230792e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 230879bdfe76SSatish Balay 23093eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 231079bdfe76SSatish Balay @*/ 2311ca01db9bSBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 231279bdfe76SSatish Balay { 2313273d9f13SBarry Smith int ierr,size; 231479bdfe76SSatish Balay 2315d64ed03dSBarry Smith PetscFunctionBegin; 2316273d9f13SBarry Smith ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2317d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2318273d9f13SBarry Smith if (size > 1) { 2319273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2320273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2321273d9f13SBarry Smith } else { 2322273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2323273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 23243914022bSBarry Smith } 23253a40ed3dSBarry Smith PetscFunctionReturn(0); 232679bdfe76SSatish Balay } 2327026e39d0SSatish Balay 23284a2ae208SSatish Balay #undef __FUNCT__ 23294a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ" 23302e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 23310ac07820SSatish Balay { 23320ac07820SSatish Balay Mat mat; 23330ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2334f1af5d2fSBarry Smith int ierr,len=0; 23350ac07820SSatish Balay 2336d64ed03dSBarry Smith PetscFunctionBegin; 23370ac07820SSatish Balay *newmat = 0; 2338273d9f13SBarry Smith ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2339273d9f13SBarry Smith ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr); 2340273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 23410ac07820SSatish Balay mat->assembled = PETSC_TRUE; 2342273d9f13SBarry Smith a = (Mat_MPIBAIJ*)mat->data; 23430ac07820SSatish Balay a->bs = oldmat->bs; 23440ac07820SSatish Balay a->bs2 = oldmat->bs2; 23450ac07820SSatish Balay a->mbs = oldmat->mbs; 23460ac07820SSatish Balay a->nbs = oldmat->nbs; 23470ac07820SSatish Balay a->Mbs = oldmat->Mbs; 23480ac07820SSatish Balay a->Nbs = oldmat->Nbs; 23490ac07820SSatish Balay 23500ac07820SSatish Balay a->rstart = oldmat->rstart; 23510ac07820SSatish Balay a->rend = oldmat->rend; 23520ac07820SSatish Balay a->cstart = oldmat->cstart; 23530ac07820SSatish Balay a->cend = oldmat->cend; 23540ac07820SSatish Balay a->size = oldmat->size; 23550ac07820SSatish Balay a->rank = oldmat->rank; 2356aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2357aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2358aef5e8e0SSatish Balay a->rowindices = 0; 23590ac07820SSatish Balay a->rowvalues = 0; 23600ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 236130793edcSSatish Balay a->barray = 0; 23623123a43fSSatish Balay a->rstart_bs = oldmat->rstart_bs; 23633123a43fSSatish Balay a->rend_bs = oldmat->rend_bs; 23643123a43fSSatish Balay a->cstart_bs = oldmat->cstart_bs; 23653123a43fSSatish Balay a->cend_bs = oldmat->cend_bs; 23660ac07820SSatish Balay 2367133cdb44SSatish Balay /* hash table stuff */ 2368133cdb44SSatish Balay a->ht = 0; 2369133cdb44SSatish Balay a->hd = 0; 2370133cdb44SSatish Balay a->ht_size = 0; 2371133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 237225fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2373133cdb44SSatish Balay a->ht_total_ct = 0; 2374133cdb44SSatish Balay a->ht_insert_ct = 0; 2375133cdb44SSatish Balay 2376549d3d68SSatish Balay ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 23778798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 23788798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 23790ac07820SSatish Balay if (oldmat->colmap) { 2380aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 23810f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 238248e59246SSatish Balay #else 238382502324SSatish Balay ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2384b0a32e0cSBarry Smith PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2385549d3d68SSatish Balay ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 238648e59246SSatish Balay #endif 23870ac07820SSatish Balay } else a->colmap = 0; 23880ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 238982502324SSatish Balay ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2390b0a32e0cSBarry Smith PetscLogObjectMemory(mat,len*sizeof(int)); 2391549d3d68SSatish Balay ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 23920ac07820SSatish Balay } else a->garray = 0; 23930ac07820SSatish Balay 23940ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2395b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->lvec); 23960ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2397e18c124aSSatish Balay 2398b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->Mvctx); 23992e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2400b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->A); 24012e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2402b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->B); 2403b0a32e0cSBarry Smith ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 24040ac07820SSatish Balay *newmat = mat; 24053a40ed3dSBarry Smith PetscFunctionReturn(0); 24060ac07820SSatish Balay } 240757b952d6SSatish Balay 2408e090d566SSatish Balay #include "petscsys.h" 240957b952d6SSatish Balay 2410273d9f13SBarry Smith EXTERN_C_BEGIN 24114a2ae208SSatish Balay #undef __FUNCT__ 24124a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ" 2413b0a32e0cSBarry Smith int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 241457b952d6SSatish Balay { 241557b952d6SSatish Balay Mat A; 241657b952d6SSatish Balay int i,nz,ierr,j,rstart,rend,fd; 241787828ca2SBarry Smith PetscScalar *vals,*buf; 241857b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 241957b952d6SSatish Balay MPI_Status status; 2420cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 242157b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2422f1af5d2fSBarry Smith int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 242357b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 242457b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 242557b952d6SSatish Balay 2426d64ed03dSBarry Smith PetscFunctionBegin; 2427b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 242857b952d6SSatish Balay 2429d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2430d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 243157b952d6SSatish Balay if (!rank) { 2432b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2433e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2434552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2435d64ed03dSBarry Smith if (header[3] < 0) { 243629bbc08cSBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2437d64ed03dSBarry Smith } 24386c5fab8fSBarry Smith } 2439d64ed03dSBarry Smith 2440ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 244157b952d6SSatish Balay M = header[1]; N = header[2]; 244257b952d6SSatish Balay 244329bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 244457b952d6SSatish Balay 244557b952d6SSatish Balay /* 244657b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 244757b952d6SSatish Balay divisible by the blocksize 244857b952d6SSatish Balay */ 244957b952d6SSatish Balay Mbs = M/bs; 245057b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 245157b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 245257b952d6SSatish Balay else Mbs++; 245357b952d6SSatish Balay if (extra_rows &&!rank) { 2454b0a32e0cSBarry Smith PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 245557b952d6SSatish Balay } 2456537820f0SBarry Smith 245757b952d6SSatish Balay /* determine ownership of all rows */ 245857b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 245957b952d6SSatish Balay m = mbs*bs; 2460b0a32e0cSBarry Smith ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2461cee3aa6bSSatish Balay browners = rowners + size + 1; 2462ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 246357b952d6SSatish Balay rowners[0] = 0; 2464cee3aa6bSSatish Balay for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2465cee3aa6bSSatish Balay for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 246657b952d6SSatish Balay rstart = rowners[rank]; 246757b952d6SSatish Balay rend = rowners[rank+1]; 246857b952d6SSatish Balay 246957b952d6SSatish Balay /* distribute row lengths to all processors */ 247082502324SSatish Balay ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 247157b952d6SSatish Balay if (!rank) { 2472b0a32e0cSBarry Smith ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2473e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 247457b952d6SSatish Balay for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 247582502324SSatish Balay ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2476cee3aa6bSSatish Balay for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2477ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2478606d414cSSatish Balay ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2479d64ed03dSBarry Smith } else { 2480ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 248157b952d6SSatish Balay } 248257b952d6SSatish Balay 248357b952d6SSatish Balay if (!rank) { 248457b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 248582502324SSatish Balay ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2486549d3d68SSatish Balay ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 248757b952d6SSatish Balay for (i=0; i<size; i++) { 248857b952d6SSatish Balay for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 248957b952d6SSatish Balay procsnz[i] += rowlengths[j]; 249057b952d6SSatish Balay } 249157b952d6SSatish Balay } 2492606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 249357b952d6SSatish Balay 249457b952d6SSatish Balay /* determine max buffer needed and allocate it */ 249557b952d6SSatish Balay maxnz = 0; 249657b952d6SSatish Balay for (i=0; i<size; i++) { 249757b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 249857b952d6SSatish Balay } 249982502324SSatish Balay ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 250057b952d6SSatish Balay 250157b952d6SSatish Balay /* read in my part of the matrix column indices */ 250257b952d6SSatish Balay nz = procsnz[0]; 250382502324SSatish Balay ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 250457b952d6SSatish Balay mycols = ibuf; 2505cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2506e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2507cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2508cee3aa6bSSatish Balay 250957b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 251057b952d6SSatish Balay for (i=1; i<size-1; i++) { 251157b952d6SSatish Balay nz = procsnz[i]; 2512e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2513ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 251457b952d6SSatish Balay } 251557b952d6SSatish Balay /* read in the stuff for the last proc */ 251657b952d6SSatish Balay if (size != 1) { 251757b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2518e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 251957b952d6SSatish Balay for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2520ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 252157b952d6SSatish Balay } 2522606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 2523d64ed03dSBarry Smith } else { 252457b952d6SSatish Balay /* determine buffer space needed for message */ 252557b952d6SSatish Balay nz = 0; 252657b952d6SSatish Balay for (i=0; i<m; i++) { 252757b952d6SSatish Balay nz += locrowlens[i]; 252857b952d6SSatish Balay } 252982502324SSatish Balay ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 253057b952d6SSatish Balay mycols = ibuf; 253157b952d6SSatish Balay /* receive message of column indices*/ 2532ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2533ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 253429bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 253557b952d6SSatish Balay } 253657b952d6SSatish Balay 253757b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 253882502324SSatish Balay ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2539cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 254082502324SSatish Balay ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2541549d3d68SSatish Balay ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 254257b952d6SSatish Balay masked1 = mask + Mbs; 254357b952d6SSatish Balay masked2 = masked1 + Mbs; 254457b952d6SSatish Balay rowcount = 0; nzcount = 0; 254557b952d6SSatish Balay for (i=0; i<mbs; i++) { 254657b952d6SSatish Balay dcount = 0; 254757b952d6SSatish Balay odcount = 0; 254857b952d6SSatish Balay for (j=0; j<bs; j++) { 254957b952d6SSatish Balay kmax = locrowlens[rowcount]; 255057b952d6SSatish Balay for (k=0; k<kmax; k++) { 255157b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 255257b952d6SSatish Balay if (!mask[tmp]) { 255357b952d6SSatish Balay mask[tmp] = 1; 255457b952d6SSatish Balay if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 255557b952d6SSatish Balay else masked1[dcount++] = tmp; 255657b952d6SSatish Balay } 255757b952d6SSatish Balay } 255857b952d6SSatish Balay rowcount++; 255957b952d6SSatish Balay } 2560cee3aa6bSSatish Balay 256157b952d6SSatish Balay dlens[i] = dcount; 256257b952d6SSatish Balay odlens[i] = odcount; 2563cee3aa6bSSatish Balay 256457b952d6SSatish Balay /* zero out the mask elements we set */ 256557b952d6SSatish Balay for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 256657b952d6SSatish Balay for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 256757b952d6SSatish Balay } 2568cee3aa6bSSatish Balay 256957b952d6SSatish Balay /* create our matrix */ 2570*78ae41b4SKris Buschelman ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2571*78ae41b4SKris Buschelman ierr = MatSetType(A,type);CHKERRQ(ierr) 2572*78ae41b4SKris Buschelman ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2573*78ae41b4SKris Buschelman 2574*78ae41b4SKris Buschelman /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 25756d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 257657b952d6SSatish Balay 257757b952d6SSatish Balay if (!rank) { 257887828ca2SBarry Smith ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 257957b952d6SSatish Balay /* read in my part of the matrix numerical values */ 258057b952d6SSatish Balay nz = procsnz[0]; 258157b952d6SSatish Balay vals = buf; 2582cee3aa6bSSatish Balay mycols = ibuf; 2583cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2584e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2585cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2586537820f0SBarry Smith 258757b952d6SSatish Balay /* insert into matrix */ 258857b952d6SSatish Balay jj = rstart*bs; 258957b952d6SSatish Balay for (i=0; i<m; i++) { 2590b48991e4SBarry Smith ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 259157b952d6SSatish Balay mycols += locrowlens[i]; 259257b952d6SSatish Balay vals += locrowlens[i]; 259357b952d6SSatish Balay jj++; 259457b952d6SSatish Balay } 259557b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 259657b952d6SSatish Balay for (i=1; i<size-1; i++) { 259757b952d6SSatish Balay nz = procsnz[i]; 259857b952d6SSatish Balay vals = buf; 2599e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2600ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 260157b952d6SSatish Balay } 260257b952d6SSatish Balay /* the last proc */ 260357b952d6SSatish Balay if (size != 1){ 260457b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2605cee3aa6bSSatish Balay vals = buf; 2606e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 260757b952d6SSatish Balay for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2608ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 260957b952d6SSatish Balay } 2610606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 2611d64ed03dSBarry Smith } else { 261257b952d6SSatish Balay /* receive numeric values */ 261387828ca2SBarry Smith ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 261457b952d6SSatish Balay 261557b952d6SSatish Balay /* receive message of values*/ 261657b952d6SSatish Balay vals = buf; 2617cee3aa6bSSatish Balay mycols = ibuf; 2618ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2619ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 262029bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 262157b952d6SSatish Balay 262257b952d6SSatish Balay /* insert into matrix */ 262357b952d6SSatish Balay jj = rstart*bs; 2624cee3aa6bSSatish Balay for (i=0; i<m; i++) { 2625b48991e4SBarry Smith ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 262657b952d6SSatish Balay mycols += locrowlens[i]; 262757b952d6SSatish Balay vals += locrowlens[i]; 262857b952d6SSatish Balay jj++; 262957b952d6SSatish Balay } 263057b952d6SSatish Balay } 2631606d414cSSatish Balay ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2632606d414cSSatish Balay ierr = PetscFree(buf);CHKERRQ(ierr); 2633606d414cSSatish Balay ierr = PetscFree(ibuf);CHKERRQ(ierr); 2634606d414cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 2635606d414cSSatish Balay ierr = PetscFree(dlens);CHKERRQ(ierr); 2636606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 26376d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 26386d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2639*78ae41b4SKris Buschelman 2640*78ae41b4SKris Buschelman *newmat = A; 26413a40ed3dSBarry Smith PetscFunctionReturn(0); 264257b952d6SSatish Balay } 2643273d9f13SBarry Smith EXTERN_C_END 264457b952d6SSatish Balay 26454a2ae208SSatish Balay #undef __FUNCT__ 26464a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2647133cdb44SSatish Balay /*@ 2648133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2649133cdb44SSatish Balay 2650133cdb44SSatish Balay Input Parameters: 2651133cdb44SSatish Balay . mat - the matrix 2652133cdb44SSatish Balay . fact - factor 2653133cdb44SSatish Balay 2654fee21e36SBarry Smith Collective on Mat 2655fee21e36SBarry Smith 26568c890885SBarry Smith Level: advanced 26578c890885SBarry Smith 2658133cdb44SSatish Balay Notes: 2659133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2660133cdb44SSatish Balay 2661133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2662133cdb44SSatish Balay 2663133cdb44SSatish Balay .seealso: MatSetOption() 2664133cdb44SSatish Balay @*/ 2665329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2666133cdb44SSatish Balay { 26675bf65638SKris Buschelman int ierr,(*f)(Mat,PetscReal); 26685bf65638SKris Buschelman 26695bf65638SKris Buschelman PetscFunctionBegin; 26705bf65638SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 26715bf65638SKris Buschelman if (f) { 26725bf65638SKris Buschelman ierr = (*f)(mat,fact);CHKERRQ(ierr); 26735bf65638SKris Buschelman } 26745bf65638SKris Buschelman PetscFunctionReturn(0); 26755bf65638SKris Buschelman } 26765bf65638SKris Buschelman 26775bf65638SKris Buschelman #undef __FUNCT__ 26785bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 26795bf65638SKris Buschelman int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 26805bf65638SKris Buschelman { 268125fdafccSSatish Balay Mat_MPIBAIJ *baij; 2682133cdb44SSatish Balay 2683133cdb44SSatish Balay PetscFunctionBegin; 2684133cdb44SSatish Balay PetscValidHeaderSpecific(mat,MAT_COOKIE); 2685133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 2686133cdb44SSatish Balay baij->ht_fact = fact; 2687133cdb44SSatish Balay PetscFunctionReturn(0); 2688133cdb44SSatish Balay } 2689f2a5309cSSatish Balay 26904a2ae208SSatish Balay #undef __FUNCT__ 26914a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2692268466fbSBarry Smith int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2693f2a5309cSSatish Balay { 2694f2a5309cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2695f2a5309cSSatish Balay PetscFunctionBegin; 2696f2a5309cSSatish Balay *Ad = a->A; 2697f2a5309cSSatish Balay *Ao = a->B; 2698195d93cdSBarry Smith *colmap = a->garray; 2699f2a5309cSSatish Balay PetscFunctionReturn(0); 2700f2a5309cSSatish Balay } 2701