1*87828ca2SBarry Smith /*$Id: mpibaij.c,v 1.226 2001/07/20 21:20:39 bsmith Exp bsmith $*/ 279bdfe76SSatish Balay 3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 579bdfe76SSatish Balay 6ca44d042SBarry Smith EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 7ca44d042SBarry Smith EXTERN int DisAssemble_MPIBAIJ(Mat); 8ca44d042SBarry Smith EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9ca44d042SBarry Smith EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10*87828ca2SBarry Smith EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *); 11*87828ca2SBarry Smith EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 12*87828ca2SBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 13*87828ca2SBarry Smith EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**); 14*87828ca2SBarry Smith EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**); 15ca44d042SBarry Smith EXTERN int MatPrintHelp_SeqBAIJ(Mat); 16*87828ca2SBarry Smith EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*); 1793fea6afSBarry Smith 1893fea6afSBarry Smith /* UGLY, ugly, ugly 19*87828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 2093fea6afSBarry Smith not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 2193fea6afSBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 2293fea6afSBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 2393fea6afSBarry Smith into the single precision data structures. 2493fea6afSBarry Smith */ 2593fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 26ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27ca44d042SBarry Smith EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29ca44d042SBarry Smith EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 3193fea6afSBarry Smith #else 326fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 3393fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 3493fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 356fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 366fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 3793fea6afSBarry Smith #endif 383b2fbd54SBarry Smith 394a2ae208SSatish Balay #undef __FUNCT__ 404a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 417843d17aSBarry Smith int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 427843d17aSBarry Smith { 437843d17aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 447843d17aSBarry Smith int ierr,i; 45*87828ca2SBarry Smith PetscScalar *va,*vb; 467843d17aSBarry Smith Vec vtmp; 477843d17aSBarry Smith 487843d17aSBarry Smith PetscFunctionBegin; 497843d17aSBarry Smith 507843d17aSBarry Smith ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51273d9f13SBarry Smith ierr = VecGetArray(v,&va);CHKERRQ(ierr); 527843d17aSBarry Smith 53ac355199SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 547843d17aSBarry Smith ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55273d9f13SBarry Smith ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 567843d17aSBarry Smith 57273d9f13SBarry Smith for (i=0; i<A->m; i++){ 58273d9f13SBarry Smith if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 597843d17aSBarry Smith } 607843d17aSBarry Smith 617843d17aSBarry Smith ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 627843d17aSBarry Smith ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63ac355199SBarry Smith ierr = VecDestroy(vtmp);CHKERRQ(ierr); 647843d17aSBarry Smith 657843d17aSBarry Smith PetscFunctionReturn(0); 667843d17aSBarry Smith } 677843d17aSBarry Smith 687fc3c18eSBarry Smith EXTERN_C_BEGIN 694a2ae208SSatish Balay #undef __FUNCT__ 704a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ" 717fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat) 727fc3c18eSBarry Smith { 737fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 747fc3c18eSBarry Smith int ierr; 757fc3c18eSBarry Smith 767fc3c18eSBarry Smith PetscFunctionBegin; 777fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 787fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 797fc3c18eSBarry Smith PetscFunctionReturn(0); 807fc3c18eSBarry Smith } 817fc3c18eSBarry Smith EXTERN_C_END 827fc3c18eSBarry Smith 837fc3c18eSBarry Smith EXTERN_C_BEGIN 844a2ae208SSatish Balay #undef __FUNCT__ 854a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 867fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat) 877fc3c18eSBarry Smith { 887fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 897fc3c18eSBarry Smith int ierr; 907fc3c18eSBarry Smith 917fc3c18eSBarry Smith PetscFunctionBegin; 927fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 937fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 947fc3c18eSBarry Smith PetscFunctionReturn(0); 957fc3c18eSBarry Smith } 967fc3c18eSBarry Smith EXTERN_C_END 977fc3c18eSBarry Smith 98537820f0SBarry Smith /* 99537820f0SBarry Smith Local utility routine that creates a mapping from the global column 10057b952d6SSatish Balay number to the local number in the off-diagonal part of the local 10157b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 10257b952d6SSatish Balay length of colmap equals the global matrix length. 10357b952d6SSatish Balay */ 1044a2ae208SSatish Balay #undef __FUNCT__ 1054a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 10657b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 10757b952d6SSatish Balay { 10857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 10957b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110dc2900e9SSatish Balay int nbs = B->nbs,i,bs=B->bs,ierr; 11157b952d6SSatish Balay 112d64ed03dSBarry Smith PetscFunctionBegin; 113aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 114f14a1c24SBarry Smith ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 11548e59246SSatish Balay for (i=0; i<nbs; i++){ 1160f5bd95cSBarry Smith ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 11748e59246SSatish Balay } 11848e59246SSatish Balay #else 11982502324SSatish Balay ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 120b0a32e0cSBarry Smith PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 121549d3d68SSatish Balay ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 122928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 12348e59246SSatish Balay #endif 1243a40ed3dSBarry Smith PetscFunctionReturn(0); 12557b952d6SSatish Balay } 12657b952d6SSatish Balay 12780c1aa95SSatish Balay #define CHUNKSIZE 10 12880c1aa95SSatish Balay 129f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 13080c1aa95SSatish Balay { \ 13180c1aa95SSatish Balay \ 13280c1aa95SSatish Balay brow = row/bs; \ 13380c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 13580c1aa95SSatish Balay bcol = col/bs; \ 13680c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 137ab26458aSBarry Smith low = 0; high = nrow; \ 138ab26458aSBarry Smith while (high-low > 3) { \ 139ab26458aSBarry Smith t = (low+high)/2; \ 140ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 141ab26458aSBarry Smith else low = t; \ 142ab26458aSBarry Smith } \ 143ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 14480c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 14580c1aa95SSatish Balay if (rp[_i] == bcol) { \ 14680c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 147eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 148eada6651SSatish Balay else *bap = value; \ 149ac7a638eSSatish Balay goto a_noinsert; \ 15080c1aa95SSatish Balay } \ 15180c1aa95SSatish Balay } \ 15289280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 15329bbc08cSBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 15480c1aa95SSatish Balay if (nrow >= rmax) { \ 15580c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 15680c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 1573eda8832SBarry Smith MatScalar *new_a; \ 15880c1aa95SSatish Balay \ 15929bbc08cSBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 16089280ab3SLois Curfman McInnes \ 16180c1aa95SSatish Balay /* malloc new storage space */ \ 1623eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 16382502324SSatish Balay ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 16480c1aa95SSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 16580c1aa95SSatish Balay new_i = new_j + new_nz; \ 16680c1aa95SSatish Balay \ 16780c1aa95SSatish Balay /* copy over old data into new slots */ \ 16880c1aa95SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 16980c1aa95SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 17180c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 1723eda8832SBarry Smith ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 1733eda8832SBarry Smith ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174*87828ca2SBarry Smith ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 1763eda8832SBarry Smith aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 17780c1aa95SSatish Balay /* free up old matrix storage */ \ 178606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179606d414cSSatish Balay if (!a->singlemalloc) { \ 180606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182606d414cSSatish Balay } \ 18380c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1847c922b88SBarry Smith a->singlemalloc = PETSC_TRUE; \ 18580c1aa95SSatish Balay \ 18680c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188b0a32e0cSBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 18980c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 19080c1aa95SSatish Balay a->reallocs++; \ 19180c1aa95SSatish Balay a->nz++; \ 19280c1aa95SSatish Balay } \ 19380c1aa95SSatish Balay N = nrow++ - 1; \ 19480c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 19580c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 19680c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1973eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 19880c1aa95SSatish Balay } \ 1993eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 20080c1aa95SSatish Balay rp[_i] = bcol; \ 20180c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 202ac7a638eSSatish Balay a_noinsert:; \ 20380c1aa95SSatish Balay ailen[brow] = nrow; \ 20480c1aa95SSatish Balay } 20557b952d6SSatish Balay 206ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207ac7a638eSSatish Balay { \ 208ac7a638eSSatish Balay brow = row/bs; \ 209ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 211ac7a638eSSatish Balay bcol = col/bs; \ 212ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 213ac7a638eSSatish Balay low = 0; high = nrow; \ 214ac7a638eSSatish Balay while (high-low > 3) { \ 215ac7a638eSSatish Balay t = (low+high)/2; \ 216ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 217ac7a638eSSatish Balay else low = t; \ 218ac7a638eSSatish Balay } \ 219ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 220ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 221ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 222ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 223ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 224ac7a638eSSatish Balay else *bap = value; \ 225ac7a638eSSatish Balay goto b_noinsert; \ 226ac7a638eSSatish Balay } \ 227ac7a638eSSatish Balay } \ 22889280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 22929bbc08cSBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 230ac7a638eSSatish Balay if (nrow >= rmax) { \ 231ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 23274c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 2333eda8832SBarry Smith MatScalar *new_a; \ 234ac7a638eSSatish Balay \ 23529bbc08cSBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 23689280ab3SLois Curfman McInnes \ 237ac7a638eSSatish Balay /* malloc new storage space */ \ 2383eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 23982502324SSatish Balay ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240ac7a638eSSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 241ac7a638eSSatish Balay new_i = new_j + new_nz; \ 242ac7a638eSSatish Balay \ 243ac7a638eSSatish Balay /* copy over old data into new slots */ \ 244ac7a638eSSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 24574c639caSSatish Balay for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246549d3d68SSatish Balay ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 247ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 2483eda8832SBarry Smith ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 2493eda8832SBarry Smith ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 2503eda8832SBarry Smith ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 2523eda8832SBarry Smith ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253ac7a638eSSatish Balay /* free up old matrix storage */ \ 254606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255606d414cSSatish Balay if (!b->singlemalloc) { \ 256606d414cSSatish Balay ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257606d414cSSatish Balay ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258606d414cSSatish Balay } \ 25974c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 2607c922b88SBarry Smith b->singlemalloc = PETSC_TRUE; \ 261ac7a638eSSatish Balay \ 262ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264b0a32e0cSBarry Smith PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 26574c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 26674c639caSSatish Balay b->reallocs++; \ 26774c639caSSatish Balay b->nz++; \ 268ac7a638eSSatish Balay } \ 269ac7a638eSSatish Balay N = nrow++ - 1; \ 270ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 271ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 272ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 2733eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274ac7a638eSSatish Balay } \ 2753eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276ac7a638eSSatish Balay rp[_i] = bcol; \ 277ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 278ac7a638eSSatish Balay b_noinsert:; \ 279ac7a638eSSatish Balay bilen[brow] = nrow; \ 280ac7a638eSSatish Balay } 281ac7a638eSSatish Balay 28293fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2834a2ae208SSatish Balay #undef __FUNCT__ 2844a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ" 285*87828ca2SBarry Smith int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 28657b952d6SSatish Balay { 2876fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 28893fea6afSBarry Smith int ierr,i,N = m*n; 2896fa18ffdSBarry Smith MatScalar *vsingle; 29093fea6afSBarry Smith 29193fea6afSBarry Smith PetscFunctionBegin; 2926fa18ffdSBarry Smith if (N > b->setvalueslen) { 2936fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 29482502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2956fa18ffdSBarry Smith b->setvalueslen = N; 2966fa18ffdSBarry Smith } 2976fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2986fa18ffdSBarry Smith 29993fea6afSBarry Smith for (i=0; i<N; i++) { 30093fea6afSBarry Smith vsingle[i] = v[i]; 30193fea6afSBarry Smith } 30293fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 30393fea6afSBarry Smith PetscFunctionReturn(0); 30493fea6afSBarry Smith } 30593fea6afSBarry Smith 3064a2ae208SSatish Balay #undef __FUNCT__ 3074a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 308*87828ca2SBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 30993fea6afSBarry Smith { 3106fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 3116fa18ffdSBarry Smith int ierr,i,N = m*n*b->bs2; 3126fa18ffdSBarry Smith MatScalar *vsingle; 31393fea6afSBarry Smith 31493fea6afSBarry Smith PetscFunctionBegin; 3156fa18ffdSBarry Smith if (N > b->setvalueslen) { 3166fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 31782502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3186fa18ffdSBarry Smith b->setvalueslen = N; 3196fa18ffdSBarry Smith } 3206fa18ffdSBarry Smith vsingle = b->setvaluescopy; 32193fea6afSBarry Smith for (i=0; i<N; i++) { 32293fea6afSBarry Smith vsingle[i] = v[i]; 32393fea6afSBarry Smith } 32493fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 32593fea6afSBarry Smith PetscFunctionReturn(0); 32693fea6afSBarry Smith } 3276fa18ffdSBarry Smith 3284a2ae208SSatish Balay #undef __FUNCT__ 3294a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 330*87828ca2SBarry Smith int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 3316fa18ffdSBarry Smith { 3326fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 3336fa18ffdSBarry Smith int ierr,i,N = m*n; 3346fa18ffdSBarry Smith MatScalar *vsingle; 3356fa18ffdSBarry Smith 3366fa18ffdSBarry Smith PetscFunctionBegin; 3376fa18ffdSBarry Smith if (N > b->setvalueslen) { 3386fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 33982502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3406fa18ffdSBarry Smith b->setvalueslen = N; 3416fa18ffdSBarry Smith } 3426fa18ffdSBarry Smith vsingle = b->setvaluescopy; 3436fa18ffdSBarry Smith for (i=0; i<N; i++) { 3446fa18ffdSBarry Smith vsingle[i] = v[i]; 3456fa18ffdSBarry Smith } 3466fa18ffdSBarry Smith ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 3476fa18ffdSBarry Smith PetscFunctionReturn(0); 3486fa18ffdSBarry Smith } 3496fa18ffdSBarry Smith 3504a2ae208SSatish Balay #undef __FUNCT__ 3514a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 352*87828ca2SBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 3536fa18ffdSBarry Smith { 3546fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 3556fa18ffdSBarry Smith int ierr,i,N = m*n*b->bs2; 3566fa18ffdSBarry Smith MatScalar *vsingle; 3576fa18ffdSBarry Smith 3586fa18ffdSBarry Smith PetscFunctionBegin; 3596fa18ffdSBarry Smith if (N > b->setvalueslen) { 3606fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 36182502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3626fa18ffdSBarry Smith b->setvalueslen = N; 3636fa18ffdSBarry Smith } 3646fa18ffdSBarry Smith vsingle = b->setvaluescopy; 3656fa18ffdSBarry Smith for (i=0; i<N; i++) { 3666fa18ffdSBarry Smith vsingle[i] = v[i]; 3676fa18ffdSBarry Smith } 3686fa18ffdSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 3696fa18ffdSBarry Smith PetscFunctionReturn(0); 3706fa18ffdSBarry Smith } 37193fea6afSBarry Smith #endif 37293fea6afSBarry Smith 3734a2ae208SSatish Balay #undef __FUNCT__ 3744a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ" 37593fea6afSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 37693fea6afSBarry Smith { 37757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 37893fea6afSBarry Smith MatScalar value; 379273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 3804fa0d573SSatish Balay int ierr,i,j,row,col; 381273d9f13SBarry Smith int rstart_orig=baij->rstart_bs; 3824fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 3834fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 38457b952d6SSatish Balay 385eada6651SSatish Balay /* Some Variables required in the macro */ 38680c1aa95SSatish Balay Mat A = baij->A; 38780c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 388ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 3893eda8832SBarry Smith MatScalar *aa=a->a; 390ac7a638eSSatish Balay 391ac7a638eSSatish Balay Mat B = baij->B; 392ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 393ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 3943eda8832SBarry Smith MatScalar *ba=b->a; 395ac7a638eSSatish Balay 396ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 397ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 3983eda8832SBarry Smith MatScalar *ap,*bap; 39980c1aa95SSatish Balay 400d64ed03dSBarry Smith PetscFunctionBegin; 40157b952d6SSatish Balay for (i=0; i<m; i++) { 4025ef9f2a5SBarry Smith if (im[i] < 0) continue; 403aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 404273d9f13SBarry Smith if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 405639f9d9dSBarry Smith #endif 40657b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 40757b952d6SSatish Balay row = im[i] - rstart_orig; 40857b952d6SSatish Balay for (j=0; j<n; j++) { 40957b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 41057b952d6SSatish Balay col = in[j] - cstart_orig; 41157b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 41380c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 41473959e64SBarry Smith } else if (in[j] < 0) continue; 415aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 416273d9f13SBarry Smith else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 417639f9d9dSBarry Smith #endif 41857b952d6SSatish Balay else { 41957b952d6SSatish Balay if (mat->was_assembled) { 420905e6a2fSBarry Smith if (!baij->colmap) { 421905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 422905e6a2fSBarry Smith } 423aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 4240f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 425fa46199cSSatish Balay col = col - 1 + in[j]%bs; 42648e59246SSatish Balay #else 427905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 42848e59246SSatish Balay #endif 42957b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 43057b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 43157b952d6SSatish Balay col = in[j]; 4329bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 4339bf004c3SSatish Balay B = baij->B; 4349bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 4359bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 4369bf004c3SSatish Balay ba=b->a; 43757b952d6SSatish Balay } 438d64ed03dSBarry Smith } else col = in[j]; 43957b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 440ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 441ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 44257b952d6SSatish Balay } 44357b952d6SSatish Balay } 444d64ed03dSBarry Smith } else { 44590f02eecSBarry Smith if (!baij->donotstash) { 446ff2fd236SBarry Smith if (roworiented) { 4476fa18ffdSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 448ff2fd236SBarry Smith } else { 4496fa18ffdSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 45057b952d6SSatish Balay } 45157b952d6SSatish Balay } 45257b952d6SSatish Balay } 45390f02eecSBarry Smith } 4543a40ed3dSBarry Smith PetscFunctionReturn(0); 45557b952d6SSatish Balay } 45657b952d6SSatish Balay 4574a2ae208SSatish Balay #undef __FUNCT__ 4584a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 45993fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 460ab26458aSBarry Smith { 461ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 46293fea6afSBarry Smith MatScalar *value,*barray=baij->barray; 463273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 464273d9f13SBarry Smith int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 465ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 466ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 467ab26458aSBarry Smith 468b16ae2b1SBarry Smith PetscFunctionBegin; 46930793edcSSatish Balay if(!barray) { 47082502324SSatish Balay ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 47182502324SSatish Balay baij->barray = barray; 47230793edcSSatish Balay } 47330793edcSSatish Balay 474ab26458aSBarry Smith if (roworiented) { 475ab26458aSBarry Smith stepval = (n-1)*bs; 476ab26458aSBarry Smith } else { 477ab26458aSBarry Smith stepval = (m-1)*bs; 478ab26458aSBarry Smith } 479ab26458aSBarry Smith for (i=0; i<m; i++) { 4805ef9f2a5SBarry Smith if (im[i] < 0) continue; 481aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 48229bbc08cSBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs); 483ab26458aSBarry Smith #endif 484ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 485ab26458aSBarry Smith row = im[i] - rstart; 486ab26458aSBarry Smith for (j=0; j<n; j++) { 48715b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 48815b57d14SSatish Balay if ((roworiented) && (n == 1)) { 48915b57d14SSatish Balay barray = v + i*bs2; 49015b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 49115b57d14SSatish Balay barray = v + j*bs2; 49215b57d14SSatish Balay } else { /* Here a copy is required */ 493ab26458aSBarry Smith if (roworiented) { 494ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 495ab26458aSBarry Smith } else { 496ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 497abef11f7SSatish Balay } 49847513183SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 49947513183SBarry Smith for (jj=0; jj<bs; jj++) { 50030793edcSSatish Balay *barray++ = *value++; 50147513183SBarry Smith } 50247513183SBarry Smith } 50330793edcSSatish Balay barray -=bs2; 50415b57d14SSatish Balay } 505abef11f7SSatish Balay 506abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 507abef11f7SSatish Balay col = in[j] - cstart; 50893fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509ab26458aSBarry Smith } 5105ef9f2a5SBarry Smith else if (in[j] < 0) continue; 511aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 51229bbc08cSBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);} 513ab26458aSBarry Smith #endif 514ab26458aSBarry Smith else { 515ab26458aSBarry Smith if (mat->was_assembled) { 516ab26458aSBarry Smith if (!baij->colmap) { 517ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 518ab26458aSBarry Smith } 519a5eb4965SSatish Balay 520aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 521aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 522fa46199cSSatish Balay { int data; 5230f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 52429bbc08cSBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 525fa46199cSSatish Balay } 52648e59246SSatish Balay #else 52729bbc08cSBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 528a5eb4965SSatish Balay #endif 52948e59246SSatish Balay #endif 530aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 5310f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 532fa46199cSSatish Balay col = (col - 1)/bs; 53348e59246SSatish Balay #else 534a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 53548e59246SSatish Balay #endif 536ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 537ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 538ab26458aSBarry Smith col = in[j]; 539ab26458aSBarry Smith } 540ab26458aSBarry Smith } 541ab26458aSBarry Smith else col = in[j]; 54293fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 543ab26458aSBarry Smith } 544ab26458aSBarry Smith } 545d64ed03dSBarry Smith } else { 546ab26458aSBarry Smith if (!baij->donotstash) { 547ff2fd236SBarry Smith if (roworiented) { 5486fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549ff2fd236SBarry Smith } else { 5506fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551ff2fd236SBarry Smith } 552abef11f7SSatish Balay } 553ab26458aSBarry Smith } 554ab26458aSBarry Smith } 5553a40ed3dSBarry Smith PetscFunctionReturn(0); 556ab26458aSBarry Smith } 5576fa18ffdSBarry Smith 5580bdbc534SSatish Balay #define HASH_KEY 0.6180339887 559c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 5606fa18ffdSBarry Smith /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 5624a2ae208SSatish Balay #undef __FUNCT__ 5634a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 5646fa18ffdSBarry Smith int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 5650bdbc534SSatish Balay { 5660bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 5680bdbc534SSatish Balay int ierr,i,j,row,col; 569273d9f13SBarry Smith int rstart_orig=baij->rstart_bs; 570c2760754SSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 571c2760754SSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 572329f5518SBarry Smith PetscReal tmp; 5733eda8832SBarry Smith MatScalar **HD = baij->hd,value; 574aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 5754a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5764a15367fSSatish Balay #endif 5770bdbc534SSatish Balay 5780bdbc534SSatish Balay PetscFunctionBegin; 5790bdbc534SSatish Balay 5800bdbc534SSatish Balay for (i=0; i<m; i++) { 581aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 58229bbc08cSBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 583273d9f13SBarry Smith if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 5840bdbc534SSatish Balay #endif 5850bdbc534SSatish Balay row = im[i]; 586c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 5870bdbc534SSatish Balay for (j=0; j<n; j++) { 5880bdbc534SSatish Balay col = in[j]; 5896fa18ffdSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5900bdbc534SSatish Balay /* Look up into the Hash Table */ 591c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 592c2760754SSatish Balay h1 = HASH(size,key,tmp); 5930bdbc534SSatish Balay 594c2760754SSatish Balay 595c2760754SSatish Balay idx = h1; 596aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 597187ce0cbSSatish Balay insert_ct++; 598187ce0cbSSatish Balay total_ct++; 599187ce0cbSSatish Balay if (HT[idx] != key) { 600187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 601187ce0cbSSatish Balay if (idx == size) { 602187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 603187ce0cbSSatish Balay if (idx == h1) { 60429bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 605187ce0cbSSatish Balay } 606187ce0cbSSatish Balay } 607187ce0cbSSatish Balay } 608187ce0cbSSatish Balay #else 609c2760754SSatish Balay if (HT[idx] != key) { 610c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 611c2760754SSatish Balay if (idx == size) { 612c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 613c2760754SSatish Balay if (idx == h1) { 61429bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 615c2760754SSatish Balay } 616c2760754SSatish Balay } 617c2760754SSatish Balay } 618187ce0cbSSatish Balay #endif 619c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 620c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 621c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 6220bdbc534SSatish Balay } 6230bdbc534SSatish Balay } else { 6240bdbc534SSatish Balay if (!baij->donotstash) { 625ff2fd236SBarry Smith if (roworiented) { 6268798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 627ff2fd236SBarry Smith } else { 6288798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 6290bdbc534SSatish Balay } 6300bdbc534SSatish Balay } 6310bdbc534SSatish Balay } 6320bdbc534SSatish Balay } 633aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 634187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 635187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 636187ce0cbSSatish Balay #endif 6370bdbc534SSatish Balay PetscFunctionReturn(0); 6380bdbc534SSatish Balay } 6390bdbc534SSatish Balay 6404a2ae208SSatish Balay #undef __FUNCT__ 6414a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 6426fa18ffdSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 6430bdbc534SSatish Balay { 6440bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 645273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 6468798bf22SSatish Balay int ierr,i,j,ii,jj,row,col; 647273d9f13SBarry Smith int rstart=baij->rstart ; 648b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 649c2760754SSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 650329f5518SBarry Smith PetscReal tmp; 6513eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 6526fa18ffdSBarry Smith MatScalar *v_t,*value; 653aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 6544a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 6554a15367fSSatish Balay #endif 6560bdbc534SSatish Balay 657d0a41580SSatish Balay PetscFunctionBegin; 658d0a41580SSatish Balay 6590bdbc534SSatish Balay if (roworiented) { 6600bdbc534SSatish Balay stepval = (n-1)*bs; 6610bdbc534SSatish Balay } else { 6620bdbc534SSatish Balay stepval = (m-1)*bs; 6630bdbc534SSatish Balay } 6640bdbc534SSatish Balay for (i=0; i<m; i++) { 665aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 66629bbc08cSBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 66729bbc08cSBarry Smith if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 6680bdbc534SSatish Balay #endif 6690bdbc534SSatish Balay row = im[i]; 670187ce0cbSSatish Balay v_t = v + i*bs2; 671c2760754SSatish Balay if (row >= rstart && row < rend) { 6720bdbc534SSatish Balay for (j=0; j<n; j++) { 6730bdbc534SSatish Balay col = in[j]; 6740bdbc534SSatish Balay 6750bdbc534SSatish Balay /* Look up into the Hash Table */ 676c2760754SSatish Balay key = row*Nbs+col+1; 677c2760754SSatish Balay h1 = HASH(size,key,tmp); 6780bdbc534SSatish Balay 679c2760754SSatish Balay idx = h1; 680aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 681187ce0cbSSatish Balay total_ct++; 682187ce0cbSSatish Balay insert_ct++; 683187ce0cbSSatish Balay if (HT[idx] != key) { 684187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 685187ce0cbSSatish Balay if (idx == size) { 686187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 687187ce0cbSSatish Balay if (idx == h1) { 68829bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 689187ce0cbSSatish Balay } 690187ce0cbSSatish Balay } 691187ce0cbSSatish Balay } 692187ce0cbSSatish Balay #else 693c2760754SSatish Balay if (HT[idx] != key) { 694c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 695c2760754SSatish Balay if (idx == size) { 696c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 697c2760754SSatish Balay if (idx == h1) { 69829bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 699c2760754SSatish Balay } 700c2760754SSatish Balay } 701c2760754SSatish Balay } 702187ce0cbSSatish Balay #endif 703c2760754SSatish Balay baij_a = HD[idx]; 7040bdbc534SSatish Balay if (roworiented) { 705c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 706187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 707187ce0cbSSatish Balay value = v_t; 708187ce0cbSSatish Balay v_t += bs; 709fef45726SSatish Balay if (addv == ADD_VALUES) { 710c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 711c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 712fef45726SSatish Balay baij_a[jj] += *value++; 713b4cc0f5aSSatish Balay } 714b4cc0f5aSSatish Balay } 715fef45726SSatish Balay } else { 716c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 717c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 718fef45726SSatish Balay baij_a[jj] = *value++; 719fef45726SSatish Balay } 720fef45726SSatish Balay } 721fef45726SSatish Balay } 7220bdbc534SSatish Balay } else { 7230bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 724fef45726SSatish Balay if (addv == ADD_VALUES) { 725b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 7260bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 727fef45726SSatish Balay baij_a[jj] += *value++; 728fef45726SSatish Balay } 729fef45726SSatish Balay } 730fef45726SSatish Balay } else { 731fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 732fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 733fef45726SSatish Balay baij_a[jj] = *value++; 734fef45726SSatish Balay } 735b4cc0f5aSSatish Balay } 7360bdbc534SSatish Balay } 7370bdbc534SSatish Balay } 7380bdbc534SSatish Balay } 7390bdbc534SSatish Balay } else { 7400bdbc534SSatish Balay if (!baij->donotstash) { 7410bdbc534SSatish Balay if (roworiented) { 7428798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 7430bdbc534SSatish Balay } else { 7448798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 7450bdbc534SSatish Balay } 7460bdbc534SSatish Balay } 7470bdbc534SSatish Balay } 7480bdbc534SSatish Balay } 749aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 750187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 751187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 752187ce0cbSSatish Balay #endif 7530bdbc534SSatish Balay PetscFunctionReturn(0); 7540bdbc534SSatish Balay } 755133cdb44SSatish Balay 7564a2ae208SSatish Balay #undef __FUNCT__ 7574a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ" 758*87828ca2SBarry Smith int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 759d6de1c52SSatish Balay { 760d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 761d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 76248e59246SSatish Balay int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 763d6de1c52SSatish Balay 764133cdb44SSatish Balay PetscFunctionBegin; 765d6de1c52SSatish Balay for (i=0; i<m; i++) { 76629bbc08cSBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 767273d9f13SBarry Smith if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 768d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 769d6de1c52SSatish Balay row = idxm[i] - bsrstart; 770d6de1c52SSatish Balay for (j=0; j<n; j++) { 77129bbc08cSBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 772273d9f13SBarry Smith if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 773d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 774d6de1c52SSatish Balay col = idxn[j] - bscstart; 77598dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 776d64ed03dSBarry Smith } else { 777905e6a2fSBarry Smith if (!baij->colmap) { 778905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 779905e6a2fSBarry Smith } 780aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 7810f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 782fa46199cSSatish Balay data --; 78348e59246SSatish Balay #else 78448e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 78548e59246SSatish Balay #endif 78648e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 787d9d09a02SSatish Balay else { 78848e59246SSatish Balay col = data + idxn[j]%bs; 78998dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 790d6de1c52SSatish Balay } 791d6de1c52SSatish Balay } 792d6de1c52SSatish Balay } 793d64ed03dSBarry Smith } else { 79429bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 795d6de1c52SSatish Balay } 796d6de1c52SSatish Balay } 7973a40ed3dSBarry Smith PetscFunctionReturn(0); 798d6de1c52SSatish Balay } 799d6de1c52SSatish Balay 8004a2ae208SSatish Balay #undef __FUNCT__ 8014a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ" 802329f5518SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 803d6de1c52SSatish Balay { 804d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 805d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 806acdf5bf4SSatish Balay int ierr,i,bs2=baij->bs2; 807329f5518SBarry Smith PetscReal sum = 0.0; 8083eda8832SBarry Smith MatScalar *v; 809d6de1c52SSatish Balay 810d64ed03dSBarry Smith PetscFunctionBegin; 811d6de1c52SSatish Balay if (baij->size == 1) { 812d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 813d6de1c52SSatish Balay } else { 814d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 815d6de1c52SSatish Balay v = amat->a; 816d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++) { 817aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 818329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 819d6de1c52SSatish Balay #else 820d6de1c52SSatish Balay sum += (*v)*(*v); v++; 821d6de1c52SSatish Balay #endif 822d6de1c52SSatish Balay } 823d6de1c52SSatish Balay v = bmat->a; 824d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++) { 825aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 826329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 827d6de1c52SSatish Balay #else 828d6de1c52SSatish Balay sum += (*v)*(*v); v++; 829d6de1c52SSatish Balay #endif 830d6de1c52SSatish Balay } 831ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 832d6de1c52SSatish Balay *norm = sqrt(*norm); 833d64ed03dSBarry Smith } else { 83429bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 835d6de1c52SSatish Balay } 836d64ed03dSBarry Smith } 8373a40ed3dSBarry Smith PetscFunctionReturn(0); 838d6de1c52SSatish Balay } 83957b952d6SSatish Balay 840bd7f49f5SSatish Balay 841fef45726SSatish Balay /* 842fef45726SSatish Balay Creates the hash table, and sets the table 843fef45726SSatish Balay This table is created only once. 844fef45726SSatish Balay If new entried need to be added to the matrix 845fef45726SSatish Balay then the hash table has to be destroyed and 846fef45726SSatish Balay recreated. 847fef45726SSatish Balay */ 8484a2ae208SSatish Balay #undef __FUNCT__ 8494a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 850329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851596b8d2eSBarry Smith { 852596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 854596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 8550bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 856549d3d68SSatish Balay int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 857187ce0cbSSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 858fef45726SSatish Balay int *HT,key; 8593eda8832SBarry Smith MatScalar **HD; 860329f5518SBarry Smith PetscReal tmp; 861aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 8624a15367fSSatish Balay int ct=0,max=0; 8634a15367fSSatish Balay #endif 864fef45726SSatish Balay 865d64ed03dSBarry Smith PetscFunctionBegin; 8660bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 8670bdbc534SSatish Balay size = baij->ht_size; 868fef45726SSatish Balay 8690bdbc534SSatish Balay if (baij->ht) { 8700bdbc534SSatish Balay PetscFunctionReturn(0); 871596b8d2eSBarry Smith } 8720bdbc534SSatish Balay 873fef45726SSatish Balay /* Allocate Memory for Hash Table */ 874b0a32e0cSBarry Smith ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 875b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 876b9e4cc15SSatish Balay HD = baij->hd; 877a07cd24cSSatish Balay HT = baij->ht; 878b9e4cc15SSatish Balay 879b9e4cc15SSatish Balay 880*87828ca2SBarry Smith ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 8810bdbc534SSatish Balay 882596b8d2eSBarry Smith 883596b8d2eSBarry Smith /* Loop Over A */ 8840bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 885596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 8860bdbc534SSatish Balay row = i+rstart; 8870bdbc534SSatish Balay col = aj[j]+cstart; 888596b8d2eSBarry Smith 889187ce0cbSSatish Balay key = row*Nbs + col + 1; 890c2760754SSatish Balay h1 = HASH(size,key,tmp); 8910bdbc534SSatish Balay for (k=0; k<size; k++){ 8920bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 8930bdbc534SSatish Balay HT[(h1+k)%size] = key; 8940bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 895596b8d2eSBarry Smith break; 896aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 897187ce0cbSSatish Balay } else { 898187ce0cbSSatish Balay ct++; 899187ce0cbSSatish Balay #endif 900596b8d2eSBarry Smith } 901187ce0cbSSatish Balay } 902aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 903187ce0cbSSatish Balay if (k> max) max = k; 904187ce0cbSSatish Balay #endif 905596b8d2eSBarry Smith } 906596b8d2eSBarry Smith } 907596b8d2eSBarry Smith /* Loop Over B */ 9080bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 909596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 9100bdbc534SSatish Balay row = i+rstart; 9110bdbc534SSatish Balay col = garray[bj[j]]; 912187ce0cbSSatish Balay key = row*Nbs + col + 1; 913c2760754SSatish Balay h1 = HASH(size,key,tmp); 9140bdbc534SSatish Balay for (k=0; k<size; k++){ 9150bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 9160bdbc534SSatish Balay HT[(h1+k)%size] = key; 9170bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 918596b8d2eSBarry Smith break; 919aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 920187ce0cbSSatish Balay } else { 921187ce0cbSSatish Balay ct++; 922187ce0cbSSatish Balay #endif 923596b8d2eSBarry Smith } 924187ce0cbSSatish Balay } 925aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 926187ce0cbSSatish Balay if (k> max) max = k; 927187ce0cbSSatish Balay #endif 928596b8d2eSBarry Smith } 929596b8d2eSBarry Smith } 930596b8d2eSBarry Smith 931596b8d2eSBarry Smith /* Print Summary */ 932aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 933c38d4ed2SBarry Smith for (i=0,j=0; i<size; i++) { 934596b8d2eSBarry Smith if (HT[i]) {j++;} 935c38d4ed2SBarry Smith } 936b0a32e0cSBarry Smith PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 937187ce0cbSSatish Balay #endif 9383a40ed3dSBarry Smith PetscFunctionReturn(0); 939596b8d2eSBarry Smith } 94057b952d6SSatish Balay 9414a2ae208SSatish Balay #undef __FUNCT__ 9424a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 943bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 944bbb85fb3SSatish Balay { 945bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 946ff2fd236SBarry Smith int ierr,nstash,reallocs; 947bbb85fb3SSatish Balay InsertMode addv; 948bbb85fb3SSatish Balay 949bbb85fb3SSatish Balay PetscFunctionBegin; 950bbb85fb3SSatish Balay if (baij->donotstash) { 951bbb85fb3SSatish Balay PetscFunctionReturn(0); 952bbb85fb3SSatish Balay } 953bbb85fb3SSatish Balay 954bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 955bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 956bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 95729bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 958bbb85fb3SSatish Balay } 959bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 960bbb85fb3SSatish Balay 9618798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 9628798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 9638798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 964b0a32e0cSBarry Smith PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 96546680499SSatish Balay ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 966b0a32e0cSBarry Smith PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 967bbb85fb3SSatish Balay PetscFunctionReturn(0); 968bbb85fb3SSatish Balay } 969bbb85fb3SSatish Balay 9704a2ae208SSatish Balay #undef __FUNCT__ 9714a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 972bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 973bbb85fb3SSatish Balay { 974bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 975a2d1c673SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 976a2d1c673SSatish Balay int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 9777c922b88SBarry Smith int *row,*col,other_disassembled; 9787c922b88SBarry Smith PetscTruth r1,r2,r3; 9793eda8832SBarry Smith MatScalar *val; 980bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 981bbb85fb3SSatish Balay 982bbb85fb3SSatish Balay PetscFunctionBegin; 983bbb85fb3SSatish Balay if (!baij->donotstash) { 984a2d1c673SSatish Balay while (1) { 9858798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 986a2d1c673SSatish Balay if (!flg) break; 987a2d1c673SSatish Balay 988bbb85fb3SSatish Balay for (i=0; i<n;) { 989bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 990bbb85fb3SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 991bbb85fb3SSatish Balay if (j < n) ncols = j-i; 992bbb85fb3SSatish Balay else ncols = n-i; 993bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 99493fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 995bbb85fb3SSatish Balay i = j; 996bbb85fb3SSatish Balay } 997bbb85fb3SSatish Balay } 9988798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 999a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 1000a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 1001a2d1c673SSatish Balay restore the original flags */ 1002a2d1c673SSatish Balay r1 = baij->roworiented; 1003a2d1c673SSatish Balay r2 = a->roworiented; 1004a2d1c673SSatish Balay r3 = b->roworiented; 10057c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 10067c922b88SBarry Smith a->roworiented = PETSC_FALSE; 10077c922b88SBarry Smith b->roworiented = PETSC_FALSE; 1008a2d1c673SSatish Balay while (1) { 10098798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1010a2d1c673SSatish Balay if (!flg) break; 1011a2d1c673SSatish Balay 1012a2d1c673SSatish Balay for (i=0; i<n;) { 1013a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 1014a2d1c673SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1015a2d1c673SSatish Balay if (j < n) ncols = j-i; 1016a2d1c673SSatish Balay else ncols = n-i; 101793fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1018a2d1c673SSatish Balay i = j; 1019a2d1c673SSatish Balay } 1020a2d1c673SSatish Balay } 10218798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1022a2d1c673SSatish Balay baij->roworiented = r1; 1023a2d1c673SSatish Balay a->roworiented = r2; 1024a2d1c673SSatish Balay b->roworiented = r3; 1025bbb85fb3SSatish Balay } 1026bbb85fb3SSatish Balay 1027bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1028bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1029bbb85fb3SSatish Balay 1030bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 1031bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 1032bbb85fb3SSatish Balay /* 1033bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 1034bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 1035bbb85fb3SSatish Balay */ 1036bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1037bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1038bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 1039bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1040bbb85fb3SSatish Balay } 1041bbb85fb3SSatish Balay } 1042bbb85fb3SSatish Balay 1043bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1044bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1045bbb85fb3SSatish Balay } 1046bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1047bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1048bbb85fb3SSatish Balay 1049aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1050bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1051b0a32e0cSBarry Smith PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 1052bbb85fb3SSatish Balay baij->ht_total_ct = 0; 1053bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 1054bbb85fb3SSatish Balay } 1055bbb85fb3SSatish Balay #endif 1056bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1057bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1058bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1059bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1060bbb85fb3SSatish Balay } 1061bbb85fb3SSatish Balay 1062606d414cSSatish Balay if (baij->rowvalues) { 1063606d414cSSatish Balay ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1064606d414cSSatish Balay baij->rowvalues = 0; 1065606d414cSSatish Balay } 1066bbb85fb3SSatish Balay PetscFunctionReturn(0); 1067bbb85fb3SSatish Balay } 106857b952d6SSatish Balay 10694a2ae208SSatish Balay #undef __FUNCT__ 10704a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1071b0a32e0cSBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 107257b952d6SSatish Balay { 107357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1074fb9695e5SSatish Balay int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 10756831982aSBarry Smith PetscTruth isascii,isdraw; 1076b0a32e0cSBarry Smith PetscViewer sviewer; 1077f3ef73ceSBarry Smith PetscViewerFormat format; 107857b952d6SSatish Balay 1079d64ed03dSBarry Smith PetscFunctionBegin; 1080b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1081fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 10820f5bd95cSBarry Smith if (isascii) { 1083b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1084fb9695e5SSatish Balay if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 10854e220ebcSLois Curfman McInnes MatInfo info; 1086d132466eSBarry Smith ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1087d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1088b0a32e0cSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1089273d9f13SBarry Smith rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 10906831982aSBarry Smith baij->bs,(int)info.memory);CHKERRQ(ierr); 1091d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1092b0a32e0cSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1093d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1094b0a32e0cSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1095b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 109657b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 10973a40ed3dSBarry Smith PetscFunctionReturn(0); 1098fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 1099b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 11003a40ed3dSBarry Smith PetscFunctionReturn(0); 110157b952d6SSatish Balay } 110257b952d6SSatish Balay } 110357b952d6SSatish Balay 11040f5bd95cSBarry Smith if (isdraw) { 1105b0a32e0cSBarry Smith PetscDraw draw; 110657b952d6SSatish Balay PetscTruth isnull; 1107b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1108b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 110957b952d6SSatish Balay } 111057b952d6SSatish Balay 111157b952d6SSatish Balay if (size == 1) { 1112873048abSBarry Smith ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 111357b952d6SSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1114d64ed03dSBarry Smith } else { 111557b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 111657b952d6SSatish Balay Mat A; 111757b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 1118273d9f13SBarry Smith int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 11193eda8832SBarry Smith MatScalar *a; 112057b952d6SSatish Balay 112157b952d6SSatish Balay if (!rank) { 112255843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1123d64ed03dSBarry Smith } else { 112455843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 112557b952d6SSatish Balay } 1126b0a32e0cSBarry Smith PetscLogObjectParent(mat,A); 112757b952d6SSatish Balay 112857b952d6SSatish Balay /* copy over the A part */ 112957b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 113057b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 113182502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 113257b952d6SSatish Balay 113357b952d6SSatish Balay for (i=0; i<mbs; i++) { 113457b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 113557b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 113657b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 113757b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 113857b952d6SSatish Balay for (k=0; k<bs; k++) { 113993fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1140cee3aa6bSSatish Balay col++; a += bs; 114157b952d6SSatish Balay } 114257b952d6SSatish Balay } 114357b952d6SSatish Balay } 114457b952d6SSatish Balay /* copy over the B part */ 114557b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 114657b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 114757b952d6SSatish Balay for (i=0; i<mbs; i++) { 114857b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 114957b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 115057b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 115157b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 115257b952d6SSatish Balay for (k=0; k<bs; k++) { 115393fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1154cee3aa6bSSatish Balay col++; a += bs; 115557b952d6SSatish Balay } 115657b952d6SSatish Balay } 115757b952d6SSatish Balay } 1158606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 11596d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11606d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 116155843e3eSBarry Smith /* 116255843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1163b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 116455843e3eSBarry Smith */ 1165b0a32e0cSBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1166f14a1c24SBarry Smith if (!rank) { 1167e36acaf3SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 11686831982aSBarry Smith ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 116957b952d6SSatish Balay } 1170b0a32e0cSBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 117157b952d6SSatish Balay ierr = MatDestroy(A);CHKERRQ(ierr); 117257b952d6SSatish Balay } 11733a40ed3dSBarry Smith PetscFunctionReturn(0); 117457b952d6SSatish Balay } 117557b952d6SSatish Balay 11764a2ae208SSatish Balay #undef __FUNCT__ 11774a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ" 1178b0a32e0cSBarry Smith int MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 117957b952d6SSatish Balay { 118057b952d6SSatish Balay int ierr; 11816831982aSBarry Smith PetscTruth isascii,isdraw,issocket,isbinary; 118257b952d6SSatish Balay 1183d64ed03dSBarry Smith PetscFunctionBegin; 1184b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1185fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1186b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1187fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 11880f5bd95cSBarry Smith if (isascii || isdraw || issocket || isbinary) { 11897b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 11905cd90555SBarry Smith } else { 119129bbc08cSBarry Smith SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 119257b952d6SSatish Balay } 11933a40ed3dSBarry Smith PetscFunctionReturn(0); 119457b952d6SSatish Balay } 119557b952d6SSatish Balay 11964a2ae208SSatish Balay #undef __FUNCT__ 11974a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ" 1198e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat) 119979bdfe76SSatish Balay { 120079bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 120179bdfe76SSatish Balay int ierr; 120279bdfe76SSatish Balay 1203d64ed03dSBarry Smith PetscFunctionBegin; 1204aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1205b0a32e0cSBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 120679bdfe76SSatish Balay #endif 12078798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 12088798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1209606d414cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 121079bdfe76SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 121179bdfe76SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1212aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 12130f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 121448e59246SSatish Balay #else 1215606d414cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 121648e59246SSatish Balay #endif 1217606d414cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1218606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1219606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1220606d414cSSatish Balay if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1221606d414cSSatish Balay if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1222606d414cSSatish Balay if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 12236fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 12246fa18ffdSBarry Smith if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 12256fa18ffdSBarry Smith #endif 1226606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 12273a40ed3dSBarry Smith PetscFunctionReturn(0); 122879bdfe76SSatish Balay } 122979bdfe76SSatish Balay 12304a2ae208SSatish Balay #undef __FUNCT__ 12314a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ" 1232ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1233cee3aa6bSSatish Balay { 1234cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 123547b4a8eaSLois Curfman McInnes int ierr,nt; 1236cee3aa6bSSatish Balay 1237d64ed03dSBarry Smith PetscFunctionBegin; 1238e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1239273d9f13SBarry Smith if (nt != A->n) { 124029bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 124147b4a8eaSLois Curfman McInnes } 1242e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1243273d9f13SBarry Smith if (nt != A->m) { 124429bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 124547b4a8eaSLois Curfman McInnes } 124643a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1247f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 124843a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1249f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 125043a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 12513a40ed3dSBarry Smith PetscFunctionReturn(0); 1252cee3aa6bSSatish Balay } 1253cee3aa6bSSatish Balay 12544a2ae208SSatish Balay #undef __FUNCT__ 12554a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1256ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1257cee3aa6bSSatish Balay { 1258cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1259cee3aa6bSSatish Balay int ierr; 1260d64ed03dSBarry Smith 1261d64ed03dSBarry Smith PetscFunctionBegin; 126243a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1263f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 126443a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1265f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 12663a40ed3dSBarry Smith PetscFunctionReturn(0); 1267cee3aa6bSSatish Balay } 1268cee3aa6bSSatish Balay 12694a2ae208SSatish Balay #undef __FUNCT__ 12704a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 12717c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1272cee3aa6bSSatish Balay { 1273cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1274cee3aa6bSSatish Balay int ierr; 1275cee3aa6bSSatish Balay 1276d64ed03dSBarry Smith PetscFunctionBegin; 1277cee3aa6bSSatish Balay /* do nondiagonal part */ 12787c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1279cee3aa6bSSatish Balay /* send it on its way */ 1280537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1281cee3aa6bSSatish Balay /* do local part */ 12827c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1283cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1284cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1285cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1286639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 12873a40ed3dSBarry Smith PetscFunctionReturn(0); 1288cee3aa6bSSatish Balay } 1289cee3aa6bSSatish Balay 12904a2ae208SSatish Balay #undef __FUNCT__ 12914a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 12927c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1293cee3aa6bSSatish Balay { 1294cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1295cee3aa6bSSatish Balay int ierr; 1296cee3aa6bSSatish Balay 1297d64ed03dSBarry Smith PetscFunctionBegin; 1298cee3aa6bSSatish Balay /* do nondiagonal part */ 12997c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1300cee3aa6bSSatish Balay /* send it on its way */ 1301537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1302cee3aa6bSSatish Balay /* do local part */ 13037c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1304cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1305cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1306cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1307537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 13083a40ed3dSBarry Smith PetscFunctionReturn(0); 1309cee3aa6bSSatish Balay } 1310cee3aa6bSSatish Balay 1311cee3aa6bSSatish Balay /* 1312cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1313cee3aa6bSSatish Balay diagonal block 1314cee3aa6bSSatish Balay */ 13154a2ae208SSatish Balay #undef __FUNCT__ 13164a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1317ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1318cee3aa6bSSatish Balay { 1319cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 13203a40ed3dSBarry Smith int ierr; 1321d64ed03dSBarry Smith 1322d64ed03dSBarry Smith PetscFunctionBegin; 1323273d9f13SBarry Smith if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 13243a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 13253a40ed3dSBarry Smith PetscFunctionReturn(0); 1326cee3aa6bSSatish Balay } 1327cee3aa6bSSatish Balay 13284a2ae208SSatish Balay #undef __FUNCT__ 13294a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ" 1330*87828ca2SBarry Smith int MatScale_MPIBAIJ(PetscScalar *aa,Mat A) 1331cee3aa6bSSatish Balay { 1332cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1333cee3aa6bSSatish Balay int ierr; 1334d64ed03dSBarry Smith 1335d64ed03dSBarry Smith PetscFunctionBegin; 1336cee3aa6bSSatish Balay ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1337cee3aa6bSSatish Balay ierr = MatScale(aa,a->B);CHKERRQ(ierr); 13383a40ed3dSBarry Smith PetscFunctionReturn(0); 1339cee3aa6bSSatish Balay } 1340026e39d0SSatish Balay 13414a2ae208SSatish Balay #undef __FUNCT__ 13424a2ae208SSatish Balay #define __FUNCT__ "MatGetOwnershipRange_MPIBAIJ" 1343ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 134457b952d6SSatish Balay { 134557b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1346d64ed03dSBarry Smith 1347d64ed03dSBarry Smith PetscFunctionBegin; 1348273d9f13SBarry Smith if (m) *m = mat->rstart_bs; 1349273d9f13SBarry Smith if (n) *n = mat->rend_bs; 13503a40ed3dSBarry Smith PetscFunctionReturn(0); 135157b952d6SSatish Balay } 135257b952d6SSatish Balay 13534a2ae208SSatish Balay #undef __FUNCT__ 13544a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ" 1355*87828ca2SBarry Smith int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1356acdf5bf4SSatish Balay { 1357acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1358*87828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1359acdf5bf4SSatish Balay int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1360d9d09a02SSatish Balay int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1361d9d09a02SSatish Balay int *cmap,*idx_p,cstart = mat->cstart; 1362acdf5bf4SSatish Balay 1363d64ed03dSBarry Smith PetscFunctionBegin; 136429bbc08cSBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1365acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1366acdf5bf4SSatish Balay 1367acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1368acdf5bf4SSatish Balay /* 1369acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1370acdf5bf4SSatish Balay */ 1371acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1372bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1373bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1374acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1375acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1376acdf5bf4SSatish Balay } 1377*87828ca2SBarry Smith ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1378acdf5bf4SSatish Balay mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1379acdf5bf4SSatish Balay } 1380acdf5bf4SSatish Balay 138129bbc08cSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1382d9d09a02SSatish Balay lrow = row - brstart; 1383acdf5bf4SSatish Balay 1384acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1385acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1386acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1387f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1388f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1389acdf5bf4SSatish Balay nztot = nzA + nzB; 1390acdf5bf4SSatish Balay 1391acdf5bf4SSatish Balay cmap = mat->garray; 1392acdf5bf4SSatish Balay if (v || idx) { 1393acdf5bf4SSatish Balay if (nztot) { 1394acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1395acdf5bf4SSatish Balay int imark = -1; 1396acdf5bf4SSatish Balay if (v) { 1397acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1398acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1399d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1400acdf5bf4SSatish Balay else break; 1401acdf5bf4SSatish Balay } 1402acdf5bf4SSatish Balay imark = i; 1403acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1404acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1405acdf5bf4SSatish Balay } 1406acdf5bf4SSatish Balay if (idx) { 1407acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1408acdf5bf4SSatish Balay if (imark > -1) { 1409acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1410bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1411acdf5bf4SSatish Balay } 1412acdf5bf4SSatish Balay } else { 1413acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1414d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1415d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1416acdf5bf4SSatish Balay else break; 1417acdf5bf4SSatish Balay } 1418acdf5bf4SSatish Balay imark = i; 1419acdf5bf4SSatish Balay } 1420d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1421d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1422acdf5bf4SSatish Balay } 1423d64ed03dSBarry Smith } else { 1424d212a18eSSatish Balay if (idx) *idx = 0; 1425d212a18eSSatish Balay if (v) *v = 0; 1426d212a18eSSatish Balay } 1427acdf5bf4SSatish Balay } 1428acdf5bf4SSatish Balay *nz = nztot; 1429f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1430f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14313a40ed3dSBarry Smith PetscFunctionReturn(0); 1432acdf5bf4SSatish Balay } 1433acdf5bf4SSatish Balay 14344a2ae208SSatish Balay #undef __FUNCT__ 14354a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1436*87828ca2SBarry Smith int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1437acdf5bf4SSatish Balay { 1438acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1439d64ed03dSBarry Smith 1440d64ed03dSBarry Smith PetscFunctionBegin; 1441acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 144229bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1443acdf5bf4SSatish Balay } 1444acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14453a40ed3dSBarry Smith PetscFunctionReturn(0); 1446acdf5bf4SSatish Balay } 1447acdf5bf4SSatish Balay 14484a2ae208SSatish Balay #undef __FUNCT__ 14494a2ae208SSatish Balay #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1450ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 14515a838052SSatish Balay { 14525a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1453d64ed03dSBarry Smith 1454d64ed03dSBarry Smith PetscFunctionBegin; 14555a838052SSatish Balay *bs = baij->bs; 14563a40ed3dSBarry Smith PetscFunctionReturn(0); 14575a838052SSatish Balay } 14585a838052SSatish Balay 14594a2ae208SSatish Balay #undef __FUNCT__ 14604a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1461ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 146258667388SSatish Balay { 146358667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 146458667388SSatish Balay int ierr; 1465d64ed03dSBarry Smith 1466d64ed03dSBarry Smith PetscFunctionBegin; 146758667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 146858667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14693a40ed3dSBarry Smith PetscFunctionReturn(0); 147058667388SSatish Balay } 14710ac07820SSatish Balay 14724a2ae208SSatish Balay #undef __FUNCT__ 14734a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1474ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14750ac07820SSatish Balay { 14764e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14774e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 14787d57db60SLois Curfman McInnes int ierr; 1479329f5518SBarry Smith PetscReal isend[5],irecv[5]; 14800ac07820SSatish Balay 1481d64ed03dSBarry Smith PetscFunctionBegin; 14824e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 14834e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 14840e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1485de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 14864e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 14870e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1488de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 14890ac07820SSatish Balay if (flag == MAT_LOCAL) { 14904e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 14914e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 14924e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 14934e220ebcSLois Curfman McInnes info->memory = isend[3]; 14944e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 14950ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1496f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 14974e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14984e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14994e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15004e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15014e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 15020ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1503f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 15044e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15054e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15064e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15074e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15084e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1509d41123aaSBarry Smith } else { 151029bbc08cSBarry Smith SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 15110ac07820SSatish Balay } 1512273d9f13SBarry Smith info->rows_global = (double)A->M; 1513273d9f13SBarry Smith info->columns_global = (double)A->N; 1514273d9f13SBarry Smith info->rows_local = (double)A->m; 1515273d9f13SBarry Smith info->columns_local = (double)A->N; 15164e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 15174e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 15184e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 15193a40ed3dSBarry Smith PetscFunctionReturn(0); 15200ac07820SSatish Balay } 15210ac07820SSatish Balay 15224a2ae208SSatish Balay #undef __FUNCT__ 15234a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ" 1524ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 152558667388SSatish Balay { 152658667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 152798305bb5SBarry Smith int ierr; 152858667388SSatish Balay 1529d64ed03dSBarry Smith PetscFunctionBegin; 153012c028f9SKris Buschelman switch (op) { 153112c028f9SKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 153212c028f9SKris Buschelman case MAT_YES_NEW_NONZERO_LOCATIONS: 153312c028f9SKris Buschelman case MAT_COLUMNS_UNSORTED: 153412c028f9SKris Buschelman case MAT_COLUMNS_SORTED: 153512c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 153612c028f9SKris Buschelman case MAT_KEEP_ZEROED_ROWS: 153712c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 153898305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 153998305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 154012c028f9SKris Buschelman break; 154112c028f9SKris Buschelman case MAT_ROW_ORIENTED: 15427c922b88SBarry Smith a->roworiented = PETSC_TRUE; 154398305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 154498305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 154512c028f9SKris Buschelman break; 154612c028f9SKris Buschelman case MAT_ROWS_SORTED: 154712c028f9SKris Buschelman case MAT_ROWS_UNSORTED: 154812c028f9SKris Buschelman case MAT_YES_NEW_DIAGONALS: 1549d03495bdSKris Buschelman case MAT_USE_SINGLE_PRECISION_SOLVES: 1550b0a32e0cSBarry Smith PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 155112c028f9SKris Buschelman break; 155212c028f9SKris Buschelman case MAT_COLUMN_ORIENTED: 15537c922b88SBarry Smith a->roworiented = PETSC_FALSE; 155498305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 155598305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 155612c028f9SKris Buschelman break; 155712c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 15587c922b88SBarry Smith a->donotstash = PETSC_TRUE; 155912c028f9SKris Buschelman break; 156012c028f9SKris Buschelman case MAT_NO_NEW_DIAGONALS: 156129bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 156212c028f9SKris Buschelman break; 156312c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 15647c922b88SBarry Smith a->ht_flag = PETSC_TRUE; 156512c028f9SKris Buschelman break; 156612c028f9SKris Buschelman default: 156729bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"unknown option"); 156812c028f9SKris Buschelman break; 1569d64ed03dSBarry Smith } 15703a40ed3dSBarry Smith PetscFunctionReturn(0); 157158667388SSatish Balay } 157258667388SSatish Balay 15734a2ae208SSatish Balay #undef __FUNCT__ 15744a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1575ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 15760ac07820SSatish Balay { 15770ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 15780ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15790ac07820SSatish Balay Mat B; 1580273d9f13SBarry Smith int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 15810ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 15823eda8832SBarry Smith MatScalar *a; 15830ac07820SSatish Balay 1584d64ed03dSBarry Smith PetscFunctionBegin; 158529bbc08cSBarry Smith if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1586273d9f13SBarry Smith ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 15870ac07820SSatish Balay 15880ac07820SSatish Balay /* copy over the A part */ 15890ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 15900ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 159182502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 15920ac07820SSatish Balay 15930ac07820SSatish Balay for (i=0; i<mbs; i++) { 15940ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 15950ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 15960ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 15970ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 15980ac07820SSatish Balay for (k=0; k<bs; k++) { 159993fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16000ac07820SSatish Balay col++; a += bs; 16010ac07820SSatish Balay } 16020ac07820SSatish Balay } 16030ac07820SSatish Balay } 16040ac07820SSatish Balay /* copy over the B part */ 16050ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 16060ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 16070ac07820SSatish Balay for (i=0; i<mbs; i++) { 16080ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 16090ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16100ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16110ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 16120ac07820SSatish Balay for (k=0; k<bs; k++) { 161393fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16140ac07820SSatish Balay col++; a += bs; 16150ac07820SSatish Balay } 16160ac07820SSatish Balay } 16170ac07820SSatish Balay } 1618606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 16190ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16200ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16210ac07820SSatish Balay 16227c922b88SBarry Smith if (matout) { 16230ac07820SSatish Balay *matout = B; 16240ac07820SSatish Balay } else { 1625273d9f13SBarry Smith ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 16260ac07820SSatish Balay } 16273a40ed3dSBarry Smith PetscFunctionReturn(0); 16280ac07820SSatish Balay } 16290e95ebc0SSatish Balay 16304a2ae208SSatish Balay #undef __FUNCT__ 16314a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 163236c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 16330e95ebc0SSatish Balay { 163436c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 163536c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 16360e95ebc0SSatish Balay int ierr,s1,s2,s3; 16370e95ebc0SSatish Balay 1638d64ed03dSBarry Smith PetscFunctionBegin; 163936c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 164036c4a09eSSatish Balay if (rr) { 164136c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 164229bbc08cSBarry Smith if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 164336c4a09eSSatish Balay /* Overlap communication with computation. */ 164436c4a09eSSatish Balay ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 164536c4a09eSSatish Balay } 16460e95ebc0SSatish Balay if (ll) { 16470e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 164829bbc08cSBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1649a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 16500e95ebc0SSatish Balay } 165136c4a09eSSatish Balay /* scale the diagonal block */ 165236c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 165336c4a09eSSatish Balay 165436c4a09eSSatish Balay if (rr) { 165536c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 165636c4a09eSSatish Balay ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1657a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 165836c4a09eSSatish Balay } 165936c4a09eSSatish Balay 16603a40ed3dSBarry Smith PetscFunctionReturn(0); 16610e95ebc0SSatish Balay } 16620e95ebc0SSatish Balay 16634a2ae208SSatish Balay #undef __FUNCT__ 16644a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1665*87828ca2SBarry Smith int MatZeroRows_MPIBAIJ(Mat A,IS is,PetscScalar *diag) 16660ac07820SSatish Balay { 16670ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 16680ac07820SSatish Balay int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 166935d8aa7fSBarry Smith int *procs,*nprocs,j,idx,nsends,*work,row; 16700ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 16710ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1672a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 16730ac07820SSatish Balay MPI_Comm comm = A->comm; 16740ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 16750ac07820SSatish Balay MPI_Status recv_status,*send_status; 16760ac07820SSatish Balay IS istmp; 167735d8aa7fSBarry Smith PetscTruth found; 16780ac07820SSatish Balay 1679d64ed03dSBarry Smith PetscFunctionBegin; 1680f14a1c24SBarry Smith ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 16810ac07820SSatish Balay ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 16820ac07820SSatish Balay 16830ac07820SSatish Balay /* first count number of contributors to each processor */ 168482502324SSatish Balay ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1685549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1686549d3d68SSatish Balay procs = nprocs + size; 1687b0a32e0cSBarry Smith ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 16880ac07820SSatish Balay for (i=0; i<N; i++) { 16890ac07820SSatish Balay idx = rows[i]; 169035d8aa7fSBarry Smith found = PETSC_FALSE; 16910ac07820SSatish Balay for (j=0; j<size; j++) { 16920ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 169335d8aa7fSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 16940ac07820SSatish Balay } 16950ac07820SSatish Balay } 169629bbc08cSBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 16970ac07820SSatish Balay } 16980ac07820SSatish Balay nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 16990ac07820SSatish Balay 17000ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 170182502324SSatish Balay ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 17026831982aSBarry Smith ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 17030ac07820SSatish Balay nmax = work[rank]; 17046831982aSBarry Smith nrecvs = work[size+rank]; 1705606d414cSSatish Balay ierr = PetscFree(work);CHKERRQ(ierr); 17060ac07820SSatish Balay 17070ac07820SSatish Balay /* post receives: */ 1708b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1709b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 17100ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 1711ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 17120ac07820SSatish Balay } 17130ac07820SSatish Balay 17140ac07820SSatish Balay /* do sends: 17150ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 17160ac07820SSatish Balay the ith processor 17170ac07820SSatish Balay */ 1718b0a32e0cSBarry Smith ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1719b0a32e0cSBarry Smith ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1720b0a32e0cSBarry Smith ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 17210ac07820SSatish Balay starts[0] = 0; 17220ac07820SSatish Balay for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 17230ac07820SSatish Balay for (i=0; i<N; i++) { 17240ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 17250ac07820SSatish Balay } 17266831982aSBarry Smith ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 17270ac07820SSatish Balay 17280ac07820SSatish Balay starts[0] = 0; 17290ac07820SSatish Balay for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 17300ac07820SSatish Balay count = 0; 17310ac07820SSatish Balay for (i=0; i<size; i++) { 17320ac07820SSatish Balay if (procs[i]) { 1733ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 17340ac07820SSatish Balay } 17350ac07820SSatish Balay } 1736606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 17370ac07820SSatish Balay 17380ac07820SSatish Balay base = owners[rank]*bs; 17390ac07820SSatish Balay 17400ac07820SSatish Balay /* wait on receives */ 1741b0a32e0cSBarry Smith ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 17420ac07820SSatish Balay source = lens + nrecvs; 17430ac07820SSatish Balay count = nrecvs; slen = 0; 17440ac07820SSatish Balay while (count) { 1745ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17460ac07820SSatish Balay /* unpack receives into our local space */ 1747ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 17480ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17490ac07820SSatish Balay lens[imdex] = n; 17500ac07820SSatish Balay slen += n; 17510ac07820SSatish Balay count--; 17520ac07820SSatish Balay } 1753606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 17540ac07820SSatish Balay 17550ac07820SSatish Balay /* move the data into the send scatter */ 1756b0a32e0cSBarry Smith ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 17570ac07820SSatish Balay count = 0; 17580ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 17590ac07820SSatish Balay values = rvalues + i*nmax; 17600ac07820SSatish Balay for (j=0; j<lens[i]; j++) { 17610ac07820SSatish Balay lrows[count++] = values[j] - base; 17620ac07820SSatish Balay } 17630ac07820SSatish Balay } 1764606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 1765606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1766606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 1767606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 17680ac07820SSatish Balay 17690ac07820SSatish Balay /* actually zap the local rows */ 1770029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1771b0a32e0cSBarry Smith PetscLogObjectParent(A,istmp); 1772a07cd24cSSatish Balay 177372dacd9aSBarry Smith /* 177472dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 177572dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 177672dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 177772dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 177872dacd9aSBarry Smith 177972dacd9aSBarry Smith Contributed by: Mathew Knepley 178072dacd9aSBarry Smith */ 17819c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 17826fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 17839c957beeSSatish Balay if (diag && (l->A->M == l->A->N)) { 17846fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 17859c957beeSSatish Balay } else if (diag) { 17866fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1787fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 178829bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1789fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 17906525c446SSatish Balay } 1791a07cd24cSSatish Balay for (i=0; i<slen; i++) { 1792a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 17933f11ea53SBarry Smith ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1794a07cd24cSSatish Balay } 1795a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17979c957beeSSatish Balay } else { 17986fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1799a07cd24cSSatish Balay } 18009c957beeSSatish Balay 18019c957beeSSatish Balay ierr = ISDestroy(istmp);CHKERRQ(ierr); 1802606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 1803a07cd24cSSatish Balay 18040ac07820SSatish Balay /* wait on sends */ 18050ac07820SSatish Balay if (nsends) { 180682502324SSatish Balay ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1807ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1808606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 18090ac07820SSatish Balay } 1810606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 1811606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 18120ac07820SSatish Balay 18133a40ed3dSBarry Smith PetscFunctionReturn(0); 18140ac07820SSatish Balay } 181572dacd9aSBarry Smith 18164a2ae208SSatish Balay #undef __FUNCT__ 18174a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1818ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1819ba4ca20aSSatish Balay { 1820ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 182125fdafccSSatish Balay MPI_Comm comm = A->comm; 1822133cdb44SSatish Balay static int called = 0; 18233a40ed3dSBarry Smith int ierr; 1824ba4ca20aSSatish Balay 1825d64ed03dSBarry Smith PetscFunctionBegin; 18263a40ed3dSBarry Smith if (!a->rank) { 18273a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 182825fdafccSSatish Balay } 182925fdafccSSatish Balay if (called) {PetscFunctionReturn(0);} else called = 1; 1830d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1831d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 18323a40ed3dSBarry Smith PetscFunctionReturn(0); 1833ba4ca20aSSatish Balay } 18340ac07820SSatish Balay 18354a2ae208SSatish Balay #undef __FUNCT__ 18364a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1837ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1838bb5a7306SBarry Smith { 1839bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1840bb5a7306SBarry Smith int ierr; 1841d64ed03dSBarry Smith 1842d64ed03dSBarry Smith PetscFunctionBegin; 1843bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 18443a40ed3dSBarry Smith PetscFunctionReturn(0); 1845bb5a7306SBarry Smith } 1846bb5a7306SBarry Smith 18472e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18480ac07820SSatish Balay 18494a2ae208SSatish Balay #undef __FUNCT__ 18504a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ" 18517fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 18527fc3c18eSBarry Smith { 18537fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 18547fc3c18eSBarry Smith Mat a,b,c,d; 18557fc3c18eSBarry Smith PetscTruth flg; 18567fc3c18eSBarry Smith int ierr; 18577fc3c18eSBarry Smith 18587fc3c18eSBarry Smith PetscFunctionBegin; 1859273d9f13SBarry Smith ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg);CHKERRQ(ierr); 1860273d9f13SBarry Smith if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 18617fc3c18eSBarry Smith a = matA->A; b = matA->B; 18627fc3c18eSBarry Smith c = matB->A; d = matB->B; 18637fc3c18eSBarry Smith 18647fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 18657fc3c18eSBarry Smith if (flg == PETSC_TRUE) { 18667fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18677fc3c18eSBarry Smith } 18687fc3c18eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 18697fc3c18eSBarry Smith PetscFunctionReturn(0); 18707fc3c18eSBarry Smith } 18717fc3c18eSBarry Smith 1872273d9f13SBarry Smith 18734a2ae208SSatish Balay #undef __FUNCT__ 18744a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1875273d9f13SBarry Smith int MatSetUpPreallocation_MPIBAIJ(Mat A) 1876273d9f13SBarry Smith { 1877273d9f13SBarry Smith int ierr; 1878273d9f13SBarry Smith 1879273d9f13SBarry Smith PetscFunctionBegin; 1880273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1881273d9f13SBarry Smith PetscFunctionReturn(0); 1882273d9f13SBarry Smith } 1883273d9f13SBarry Smith 188479bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1885cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1886cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1887cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1888cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1889cc2dc46cSBarry Smith MatMult_MPIBAIJ, 1890cc2dc46cSBarry Smith MatMultAdd_MPIBAIJ, 18917c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 18927c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 1893cc2dc46cSBarry Smith 0, 1894cc2dc46cSBarry Smith 0, 1895cc2dc46cSBarry Smith 0, 1896cc2dc46cSBarry Smith 0, 1897cc2dc46cSBarry Smith 0, 1898cc2dc46cSBarry Smith 0, 1899cc2dc46cSBarry Smith 0, 1900cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 1901cc2dc46cSBarry Smith MatGetInfo_MPIBAIJ, 19027fc3c18eSBarry Smith MatEqual_MPIBAIJ, 1903cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1904cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1905cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 1906cc2dc46cSBarry Smith MatAssemblyBegin_MPIBAIJ, 1907cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1908cc2dc46cSBarry Smith 0, 1909cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1910cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 1911cc2dc46cSBarry Smith MatZeroRows_MPIBAIJ, 1912cc2dc46cSBarry Smith 0, 1913cc2dc46cSBarry Smith 0, 1914cc2dc46cSBarry Smith 0, 1915cc2dc46cSBarry Smith 0, 1916273d9f13SBarry Smith MatSetUpPreallocation_MPIBAIJ, 1917273d9f13SBarry Smith 0, 1918cc2dc46cSBarry Smith MatGetOwnershipRange_MPIBAIJ, 1919cc2dc46cSBarry Smith 0, 1920cc2dc46cSBarry Smith 0, 1921cc2dc46cSBarry Smith 0, 1922cc2dc46cSBarry Smith 0, 19232e8a6d31SBarry Smith MatDuplicate_MPIBAIJ, 1924cc2dc46cSBarry Smith 0, 1925cc2dc46cSBarry Smith 0, 1926cc2dc46cSBarry Smith 0, 1927cc2dc46cSBarry Smith 0, 1928cc2dc46cSBarry Smith 0, 1929cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1930cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1931cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1932cc2dc46cSBarry Smith 0, 1933cc2dc46cSBarry Smith MatPrintHelp_MPIBAIJ, 1934cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1935cc2dc46cSBarry Smith 0, 1936cc2dc46cSBarry Smith 0, 1937cc2dc46cSBarry Smith 0, 1938cc2dc46cSBarry Smith MatGetBlockSize_MPIBAIJ, 1939cc2dc46cSBarry Smith 0, 1940cc2dc46cSBarry Smith 0, 1941cc2dc46cSBarry Smith 0, 1942cc2dc46cSBarry Smith 0, 1943cc2dc46cSBarry Smith 0, 1944cc2dc46cSBarry Smith 0, 1945cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 1946cc2dc46cSBarry Smith 0, 1947cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 1948cc2dc46cSBarry Smith 0, 1949f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 1950f14a1c24SBarry Smith MatView_MPIBAIJ, 19518a124369SBarry Smith MatGetPetscMaps_Petsc, 19527843d17aSBarry Smith 0, 19537843d17aSBarry Smith 0, 19547843d17aSBarry Smith 0, 19557843d17aSBarry Smith 0, 19567843d17aSBarry Smith 0, 19577843d17aSBarry Smith 0, 19587843d17aSBarry Smith MatGetRowMax_MPIBAIJ}; 195979bdfe76SSatish Balay 19605ef9f2a5SBarry Smith 1961e18c124aSSatish Balay EXTERN_C_BEGIN 19624a2ae208SSatish Balay #undef __FUNCT__ 19634a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 19645ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 19655ef9f2a5SBarry Smith { 19665ef9f2a5SBarry Smith PetscFunctionBegin; 19675ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 19685ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 19695ef9f2a5SBarry Smith PetscFunctionReturn(0); 19705ef9f2a5SBarry Smith } 1971e18c124aSSatish Balay EXTERN_C_END 197279bdfe76SSatish Balay 1973273d9f13SBarry Smith EXTERN_C_BEGIN 19744a2ae208SSatish Balay #undef __FUNCT__ 19754a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ" 1976273d9f13SBarry Smith int MatCreate_MPIBAIJ(Mat B) 1977273d9f13SBarry Smith { 1978273d9f13SBarry Smith Mat_MPIBAIJ *b; 1979cfce73b9SSatish Balay int ierr; 1980273d9f13SBarry Smith PetscTruth flg; 1981273d9f13SBarry Smith 1982273d9f13SBarry Smith PetscFunctionBegin; 1983273d9f13SBarry Smith 198482502324SSatish Balay ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 198582502324SSatish Balay B->data = (void*)b; 198682502324SSatish Balay 1987273d9f13SBarry Smith ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 1988273d9f13SBarry Smith ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1989273d9f13SBarry Smith B->mapping = 0; 1990273d9f13SBarry Smith B->factor = 0; 1991273d9f13SBarry Smith B->assembled = PETSC_FALSE; 1992273d9f13SBarry Smith 1993273d9f13SBarry Smith B->insertmode = NOT_SET_VALUES; 1994273d9f13SBarry Smith ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1995273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1996273d9f13SBarry Smith 1997273d9f13SBarry Smith /* build local table of row and column ownerships */ 199882502324SSatish Balay ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1999b0a32e0cSBarry Smith PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2000273d9f13SBarry Smith b->cowners = b->rowners + b->size + 2; 2001273d9f13SBarry Smith b->rowners_bs = b->cowners + b->size + 2; 2002273d9f13SBarry Smith 2003273d9f13SBarry Smith /* build cache for off array entries formed */ 2004273d9f13SBarry Smith ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2005273d9f13SBarry Smith b->donotstash = PETSC_FALSE; 2006273d9f13SBarry Smith b->colmap = PETSC_NULL; 2007273d9f13SBarry Smith b->garray = PETSC_NULL; 2008273d9f13SBarry Smith b->roworiented = PETSC_TRUE; 2009273d9f13SBarry Smith 2010cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 2011273d9f13SBarry Smith /* stuff for MatSetValues_XXX in single precision */ 201264a35ccbSBarry Smith b->setvalueslen = 0; 2013273d9f13SBarry Smith b->setvaluescopy = PETSC_NULL; 2014273d9f13SBarry Smith #endif 2015273d9f13SBarry Smith 2016273d9f13SBarry Smith /* stuff used in block assembly */ 2017273d9f13SBarry Smith b->barray = 0; 2018273d9f13SBarry Smith 2019273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 2020273d9f13SBarry Smith b->lvec = 0; 2021273d9f13SBarry Smith b->Mvctx = 0; 2022273d9f13SBarry Smith 2023273d9f13SBarry Smith /* stuff for MatGetRow() */ 2024273d9f13SBarry Smith b->rowindices = 0; 2025273d9f13SBarry Smith b->rowvalues = 0; 2026273d9f13SBarry Smith b->getrowactive = PETSC_FALSE; 2027273d9f13SBarry Smith 2028273d9f13SBarry Smith /* hash table stuff */ 2029273d9f13SBarry Smith b->ht = 0; 2030273d9f13SBarry Smith b->hd = 0; 2031273d9f13SBarry Smith b->ht_size = 0; 2032273d9f13SBarry Smith b->ht_flag = PETSC_FALSE; 2033273d9f13SBarry Smith b->ht_fact = 0; 2034273d9f13SBarry Smith b->ht_total_ct = 0; 2035273d9f13SBarry Smith b->ht_insert_ct = 0; 2036273d9f13SBarry Smith 2037b0a32e0cSBarry Smith ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2038273d9f13SBarry Smith if (flg) { 2039273d9f13SBarry Smith double fact = 1.39; 2040273d9f13SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2041*87828ca2SBarry Smith ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2042273d9f13SBarry Smith if (fact <= 1.0) fact = 1.39; 2043273d9f13SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2044b0a32e0cSBarry Smith PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2045273d9f13SBarry Smith } 2046273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2047273d9f13SBarry Smith "MatStoreValues_MPIBAIJ", 2048273d9f13SBarry Smith MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2049273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2050273d9f13SBarry Smith "MatRetrieveValues_MPIBAIJ", 2051273d9f13SBarry Smith MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2052273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2053273d9f13SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 2054273d9f13SBarry Smith MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2055273d9f13SBarry Smith PetscFunctionReturn(0); 2056273d9f13SBarry Smith } 2057273d9f13SBarry Smith EXTERN_C_END 2058273d9f13SBarry Smith 20594a2ae208SSatish Balay #undef __FUNCT__ 20604a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2061273d9f13SBarry Smith /*@C 2062273d9f13SBarry Smith MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2063273d9f13SBarry Smith (block compressed row). For good matrix assembly performance 2064273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 2065273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2066273d9f13SBarry Smith performance can be increased by more than a factor of 50. 2067273d9f13SBarry Smith 2068273d9f13SBarry Smith Collective on Mat 2069273d9f13SBarry Smith 2070273d9f13SBarry Smith Input Parameters: 2071273d9f13SBarry Smith + A - the matrix 2072273d9f13SBarry Smith . bs - size of blockk 2073273d9f13SBarry Smith . d_nz - number of block nonzeros per block row in diagonal portion of local 2074273d9f13SBarry Smith submatrix (same for all local rows) 2075273d9f13SBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 2076273d9f13SBarry Smith of the in diagonal portion of the local (possibly different for each block 2077273d9f13SBarry Smith row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2078273d9f13SBarry Smith . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2079273d9f13SBarry Smith submatrix (same for all local rows). 2080273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 2081273d9f13SBarry Smith off-diagonal portion of the local submatrix (possibly different for 2082273d9f13SBarry Smith each block row) or PETSC_NULL. 2083273d9f13SBarry Smith 2084273d9f13SBarry Smith Output Parameter: 2085273d9f13SBarry Smith 2086273d9f13SBarry Smith 2087273d9f13SBarry Smith Options Database Keys: 2088273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 2089273d9f13SBarry Smith block calculations (much slower) 2090273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 2091273d9f13SBarry Smith 2092273d9f13SBarry Smith Notes: 2093273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2094273d9f13SBarry Smith than it must be used on all processors that share the object for that argument. 2095273d9f13SBarry Smith 2096273d9f13SBarry Smith Storage Information: 2097273d9f13SBarry Smith For a square global matrix we define each processor's diagonal portion 2098273d9f13SBarry Smith to be its local rows and the corresponding columns (a square submatrix); 2099273d9f13SBarry Smith each processor's off-diagonal portion encompasses the remainder of the 2100273d9f13SBarry Smith local matrix (a rectangular submatrix). 2101273d9f13SBarry Smith 2102273d9f13SBarry Smith The user can specify preallocated storage for the diagonal part of 2103273d9f13SBarry Smith the local submatrix with either d_nz or d_nnz (not both). Set 2104273d9f13SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2105273d9f13SBarry Smith memory allocation. Likewise, specify preallocated storage for the 2106273d9f13SBarry Smith off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2107273d9f13SBarry Smith 2108273d9f13SBarry Smith Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2109273d9f13SBarry Smith the figure below we depict these three local rows and all columns (0-11). 2110273d9f13SBarry Smith 2111273d9f13SBarry Smith .vb 2112273d9f13SBarry Smith 0 1 2 3 4 5 6 7 8 9 10 11 2113273d9f13SBarry Smith ------------------- 2114273d9f13SBarry Smith row 3 | o o o d d d o o o o o o 2115273d9f13SBarry Smith row 4 | o o o d d d o o o o o o 2116273d9f13SBarry Smith row 5 | o o o d d d o o o o o o 2117273d9f13SBarry Smith ------------------- 2118273d9f13SBarry Smith .ve 2119273d9f13SBarry Smith 2120273d9f13SBarry Smith Thus, any entries in the d locations are stored in the d (diagonal) 2121273d9f13SBarry Smith submatrix, and any entries in the o locations are stored in the 2122273d9f13SBarry Smith o (off-diagonal) submatrix. Note that the d and the o submatrices are 2123273d9f13SBarry Smith stored simply in the MATSEQBAIJ format for compressed row storage. 2124273d9f13SBarry Smith 2125273d9f13SBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2126273d9f13SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 2127273d9f13SBarry Smith In general, for PDE problems in which most nonzeros are near the diagonal, 2128273d9f13SBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2129273d9f13SBarry Smith or you will get TERRIBLE performance; see the users' manual chapter on 2130273d9f13SBarry Smith matrices. 2131273d9f13SBarry Smith 2132273d9f13SBarry Smith Level: intermediate 2133273d9f13SBarry Smith 2134273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel 2135273d9f13SBarry Smith 2136273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2137273d9f13SBarry Smith @*/ 2138273d9f13SBarry Smith int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2139273d9f13SBarry Smith { 2140273d9f13SBarry Smith Mat_MPIBAIJ *b; 2141273d9f13SBarry Smith int ierr,i; 2142273d9f13SBarry Smith PetscTruth flg2; 2143273d9f13SBarry Smith 2144273d9f13SBarry Smith PetscFunctionBegin; 2145273d9f13SBarry Smith ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 2146273d9f13SBarry Smith if (!flg2) PetscFunctionReturn(0); 214747a75d0bSBarry Smith 2148273d9f13SBarry Smith B->preallocated = PETSC_TRUE; 2149b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2150273d9f13SBarry Smith 2151273d9f13SBarry Smith if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2152435da068SBarry Smith if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2153435da068SBarry Smith if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2154435da068SBarry Smith if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2155435da068SBarry Smith if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2156273d9f13SBarry Smith if (d_nnz) { 2157273d9f13SBarry Smith for (i=0; i<B->m/bs; i++) { 2158273d9f13SBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2159273d9f13SBarry Smith } 2160273d9f13SBarry Smith } 2161273d9f13SBarry Smith if (o_nnz) { 2162273d9f13SBarry Smith for (i=0; i<B->m/bs; i++) { 2163273d9f13SBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2164273d9f13SBarry Smith } 2165273d9f13SBarry Smith } 2166273d9f13SBarry Smith 216747a75d0bSBarry Smith ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 216847a75d0bSBarry Smith ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 21698a124369SBarry Smith ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 21708a124369SBarry Smith ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 217147a75d0bSBarry Smith 2172273d9f13SBarry Smith b = (Mat_MPIBAIJ*)B->data; 2173273d9f13SBarry Smith b->bs = bs; 2174273d9f13SBarry Smith b->bs2 = bs*bs; 2175273d9f13SBarry Smith b->mbs = B->m/bs; 2176273d9f13SBarry Smith b->nbs = B->n/bs; 2177273d9f13SBarry Smith b->Mbs = B->M/bs; 2178273d9f13SBarry Smith b->Nbs = B->N/bs; 2179273d9f13SBarry Smith 2180273d9f13SBarry Smith ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2181273d9f13SBarry Smith b->rowners[0] = 0; 2182273d9f13SBarry Smith for (i=2; i<=b->size; i++) { 2183273d9f13SBarry Smith b->rowners[i] += b->rowners[i-1]; 2184273d9f13SBarry Smith } 2185273d9f13SBarry Smith b->rstart = b->rowners[b->rank]; 2186273d9f13SBarry Smith b->rend = b->rowners[b->rank+1]; 2187273d9f13SBarry Smith 2188273d9f13SBarry Smith ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2189273d9f13SBarry Smith b->cowners[0] = 0; 2190273d9f13SBarry Smith for (i=2; i<=b->size; i++) { 2191273d9f13SBarry Smith b->cowners[i] += b->cowners[i-1]; 2192273d9f13SBarry Smith } 2193273d9f13SBarry Smith b->cstart = b->cowners[b->rank]; 2194273d9f13SBarry Smith b->cend = b->cowners[b->rank+1]; 2195273d9f13SBarry Smith 2196273d9f13SBarry Smith for (i=0; i<=b->size; i++) { 2197273d9f13SBarry Smith b->rowners_bs[i] = b->rowners[i]*bs; 2198273d9f13SBarry Smith } 2199273d9f13SBarry Smith b->rstart_bs = b->rstart*bs; 2200273d9f13SBarry Smith b->rend_bs = b->rend*bs; 2201273d9f13SBarry Smith b->cstart_bs = b->cstart*bs; 2202273d9f13SBarry Smith b->cend_bs = b->cend*bs; 2203273d9f13SBarry Smith 2204273d9f13SBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2205b0a32e0cSBarry Smith PetscLogObjectParent(B,b->A); 2206273d9f13SBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2207b0a32e0cSBarry Smith PetscLogObjectParent(B,b->B); 2208273d9f13SBarry Smith ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2209273d9f13SBarry Smith 2210273d9f13SBarry Smith PetscFunctionReturn(0); 2211273d9f13SBarry Smith } 2212273d9f13SBarry Smith 22134a2ae208SSatish Balay #undef __FUNCT__ 22144a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ" 221579bdfe76SSatish Balay /*@C 221679bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 221779bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 221879bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 221979bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 222079bdfe76SSatish Balay performance can be increased by more than a factor of 50. 222179bdfe76SSatish Balay 2222db81eaa0SLois Curfman McInnes Collective on MPI_Comm 2223db81eaa0SLois Curfman McInnes 222479bdfe76SSatish Balay Input Parameters: 2225db81eaa0SLois Curfman McInnes + comm - MPI communicator 222679bdfe76SSatish Balay . bs - size of blockk 222779bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 222892e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 222992e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 223092e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 223192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 223292e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 2233be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2234be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 223547a75d0bSBarry Smith . d_nz - number of nonzero blocks per block row in diagonal portion of local 223679bdfe76SSatish Balay submatrix (same for all local rows) 223747a75d0bSBarry Smith . d_nnz - array containing the number of nonzero blocks in the various block rows 223892e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 2239db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 224047a75d0bSBarry Smith . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 224179bdfe76SSatish Balay submatrix (same for all local rows). 224247a75d0bSBarry Smith - o_nnz - array containing the number of nonzero blocks in the various block rows of the 224392e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 224492e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 224579bdfe76SSatish Balay 224679bdfe76SSatish Balay Output Parameter: 224779bdfe76SSatish Balay . A - the matrix 224879bdfe76SSatish Balay 2249db81eaa0SLois Curfman McInnes Options Database Keys: 2250db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 2251db81eaa0SLois Curfman McInnes block calculations (much slower) 2252db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 22533ffaccefSLois Curfman McInnes 2254b259b22eSLois Curfman McInnes Notes: 225547a75d0bSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 225647a75d0bSBarry Smith 225779bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 225879bdfe76SSatish Balay (possibly both). 225979bdfe76SSatish Balay 2260be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2261be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 2262be79a94dSBarry Smith 226379bdfe76SSatish Balay Storage Information: 226479bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 226579bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 226679bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 226779bdfe76SSatish Balay local matrix (a rectangular submatrix). 226879bdfe76SSatish Balay 226979bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 227079bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 227179bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 227279bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 227379bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 227479bdfe76SSatish Balay 227579bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 227679bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 227779bdfe76SSatish Balay 2278db81eaa0SLois Curfman McInnes .vb 2279db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 2280db81eaa0SLois Curfman McInnes ------------------- 2281db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 2282db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 2283db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 2284db81eaa0SLois Curfman McInnes ------------------- 2285db81eaa0SLois Curfman McInnes .ve 228679bdfe76SSatish Balay 228779bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 228879bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 228979bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 229057b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 229179bdfe76SSatish Balay 2292d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2293d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 229479bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 229592e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 229692e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 22976da5968aSLois Curfman McInnes matrices. 229879bdfe76SSatish Balay 2299027ccd11SLois Curfman McInnes Level: intermediate 2300027ccd11SLois Curfman McInnes 230192e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 230279bdfe76SSatish Balay 23033eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 230479bdfe76SSatish Balay @*/ 2305329f5518SBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 230679bdfe76SSatish Balay { 2307273d9f13SBarry Smith int ierr,size; 230879bdfe76SSatish Balay 2309d64ed03dSBarry Smith PetscFunctionBegin; 2310273d9f13SBarry Smith ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2311d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2312273d9f13SBarry Smith if (size > 1) { 2313273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2314273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2315273d9f13SBarry Smith } else { 2316273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2317273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 23183914022bSBarry Smith } 23193a40ed3dSBarry Smith PetscFunctionReturn(0); 232079bdfe76SSatish Balay } 2321026e39d0SSatish Balay 23224a2ae208SSatish Balay #undef __FUNCT__ 23234a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ" 23242e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 23250ac07820SSatish Balay { 23260ac07820SSatish Balay Mat mat; 23270ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2328f1af5d2fSBarry Smith int ierr,len=0; 23290ac07820SSatish Balay 2330d64ed03dSBarry Smith PetscFunctionBegin; 23310ac07820SSatish Balay *newmat = 0; 2332273d9f13SBarry Smith ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2333273d9f13SBarry Smith ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr); 2334273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 23350ac07820SSatish Balay mat->assembled = PETSC_TRUE; 2336273d9f13SBarry Smith a = (Mat_MPIBAIJ*)mat->data; 23370ac07820SSatish Balay a->bs = oldmat->bs; 23380ac07820SSatish Balay a->bs2 = oldmat->bs2; 23390ac07820SSatish Balay a->mbs = oldmat->mbs; 23400ac07820SSatish Balay a->nbs = oldmat->nbs; 23410ac07820SSatish Balay a->Mbs = oldmat->Mbs; 23420ac07820SSatish Balay a->Nbs = oldmat->Nbs; 23430ac07820SSatish Balay 23440ac07820SSatish Balay a->rstart = oldmat->rstart; 23450ac07820SSatish Balay a->rend = oldmat->rend; 23460ac07820SSatish Balay a->cstart = oldmat->cstart; 23470ac07820SSatish Balay a->cend = oldmat->cend; 23480ac07820SSatish Balay a->size = oldmat->size; 23490ac07820SSatish Balay a->rank = oldmat->rank; 2350aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2351aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2352aef5e8e0SSatish Balay a->rowindices = 0; 23530ac07820SSatish Balay a->rowvalues = 0; 23540ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 235530793edcSSatish Balay a->barray = 0; 23563123a43fSSatish Balay a->rstart_bs = oldmat->rstart_bs; 23573123a43fSSatish Balay a->rend_bs = oldmat->rend_bs; 23583123a43fSSatish Balay a->cstart_bs = oldmat->cstart_bs; 23593123a43fSSatish Balay a->cend_bs = oldmat->cend_bs; 23600ac07820SSatish Balay 2361133cdb44SSatish Balay /* hash table stuff */ 2362133cdb44SSatish Balay a->ht = 0; 2363133cdb44SSatish Balay a->hd = 0; 2364133cdb44SSatish Balay a->ht_size = 0; 2365133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 236625fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2367133cdb44SSatish Balay a->ht_total_ct = 0; 2368133cdb44SSatish Balay a->ht_insert_ct = 0; 2369133cdb44SSatish Balay 2370549d3d68SSatish Balay ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 23718798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 23728798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 23730ac07820SSatish Balay if (oldmat->colmap) { 2374aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 23750f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 237648e59246SSatish Balay #else 237782502324SSatish Balay ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2378b0a32e0cSBarry Smith PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2379549d3d68SSatish Balay ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 238048e59246SSatish Balay #endif 23810ac07820SSatish Balay } else a->colmap = 0; 23820ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 238382502324SSatish Balay ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2384b0a32e0cSBarry Smith PetscLogObjectMemory(mat,len*sizeof(int)); 2385549d3d68SSatish Balay ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 23860ac07820SSatish Balay } else a->garray = 0; 23870ac07820SSatish Balay 23880ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2389b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->lvec); 23900ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2391e18c124aSSatish Balay 2392b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->Mvctx); 23932e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2394b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->A); 23952e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2396b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->B); 2397b0a32e0cSBarry Smith ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 23980ac07820SSatish Balay *newmat = mat; 23993a40ed3dSBarry Smith PetscFunctionReturn(0); 24000ac07820SSatish Balay } 240157b952d6SSatish Balay 2402e090d566SSatish Balay #include "petscsys.h" 240357b952d6SSatish Balay 2404273d9f13SBarry Smith EXTERN_C_BEGIN 24054a2ae208SSatish Balay #undef __FUNCT__ 24064a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ" 2407b0a32e0cSBarry Smith int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 240857b952d6SSatish Balay { 240957b952d6SSatish Balay Mat A; 241057b952d6SSatish Balay int i,nz,ierr,j,rstart,rend,fd; 2411*87828ca2SBarry Smith PetscScalar *vals,*buf; 241257b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 241357b952d6SSatish Balay MPI_Status status; 2414cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 241557b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2416f1af5d2fSBarry Smith int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 241757b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 241857b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 241957b952d6SSatish Balay 2420d64ed03dSBarry Smith PetscFunctionBegin; 2421b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 242257b952d6SSatish Balay 2423d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2424d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 242557b952d6SSatish Balay if (!rank) { 2426b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2427e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 242829bbc08cSBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2429d64ed03dSBarry Smith if (header[3] < 0) { 243029bbc08cSBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2431d64ed03dSBarry Smith } 24326c5fab8fSBarry Smith } 2433d64ed03dSBarry Smith 2434ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 243557b952d6SSatish Balay M = header[1]; N = header[2]; 243657b952d6SSatish Balay 243729bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 243857b952d6SSatish Balay 243957b952d6SSatish Balay /* 244057b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 244157b952d6SSatish Balay divisible by the blocksize 244257b952d6SSatish Balay */ 244357b952d6SSatish Balay Mbs = M/bs; 244457b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 244557b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 244657b952d6SSatish Balay else Mbs++; 244757b952d6SSatish Balay if (extra_rows &&!rank) { 2448b0a32e0cSBarry Smith PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 244957b952d6SSatish Balay } 2450537820f0SBarry Smith 245157b952d6SSatish Balay /* determine ownership of all rows */ 245257b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 245357b952d6SSatish Balay m = mbs*bs; 2454b0a32e0cSBarry Smith ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2455cee3aa6bSSatish Balay browners = rowners + size + 1; 2456ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 245757b952d6SSatish Balay rowners[0] = 0; 2458cee3aa6bSSatish Balay for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2459cee3aa6bSSatish Balay for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 246057b952d6SSatish Balay rstart = rowners[rank]; 246157b952d6SSatish Balay rend = rowners[rank+1]; 246257b952d6SSatish Balay 246357b952d6SSatish Balay /* distribute row lengths to all processors */ 246482502324SSatish Balay ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 246557b952d6SSatish Balay if (!rank) { 2466b0a32e0cSBarry Smith ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2467e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 246857b952d6SSatish Balay for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 246982502324SSatish Balay ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2470cee3aa6bSSatish Balay for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2471ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2472606d414cSSatish Balay ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2473d64ed03dSBarry Smith } else { 2474ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 247557b952d6SSatish Balay } 247657b952d6SSatish Balay 247757b952d6SSatish Balay if (!rank) { 247857b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 247982502324SSatish Balay ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2480549d3d68SSatish Balay ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 248157b952d6SSatish Balay for (i=0; i<size; i++) { 248257b952d6SSatish Balay for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 248357b952d6SSatish Balay procsnz[i] += rowlengths[j]; 248457b952d6SSatish Balay } 248557b952d6SSatish Balay } 2486606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 248757b952d6SSatish Balay 248857b952d6SSatish Balay /* determine max buffer needed and allocate it */ 248957b952d6SSatish Balay maxnz = 0; 249057b952d6SSatish Balay for (i=0; i<size; i++) { 249157b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 249257b952d6SSatish Balay } 249382502324SSatish Balay ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 249457b952d6SSatish Balay 249557b952d6SSatish Balay /* read in my part of the matrix column indices */ 249657b952d6SSatish Balay nz = procsnz[0]; 249782502324SSatish Balay ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 249857b952d6SSatish Balay mycols = ibuf; 2499cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2500e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2501cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2502cee3aa6bSSatish Balay 250357b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 250457b952d6SSatish Balay for (i=1; i<size-1; i++) { 250557b952d6SSatish Balay nz = procsnz[i]; 2506e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2507ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 250857b952d6SSatish Balay } 250957b952d6SSatish Balay /* read in the stuff for the last proc */ 251057b952d6SSatish Balay if (size != 1) { 251157b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2512e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 251357b952d6SSatish Balay for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2514ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 251557b952d6SSatish Balay } 2516606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 2517d64ed03dSBarry Smith } else { 251857b952d6SSatish Balay /* determine buffer space needed for message */ 251957b952d6SSatish Balay nz = 0; 252057b952d6SSatish Balay for (i=0; i<m; i++) { 252157b952d6SSatish Balay nz += locrowlens[i]; 252257b952d6SSatish Balay } 252382502324SSatish Balay ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 252457b952d6SSatish Balay mycols = ibuf; 252557b952d6SSatish Balay /* receive message of column indices*/ 2526ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2527ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 252829bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 252957b952d6SSatish Balay } 253057b952d6SSatish Balay 253157b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 253282502324SSatish Balay ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2533cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 253482502324SSatish Balay ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2535549d3d68SSatish Balay ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 253657b952d6SSatish Balay masked1 = mask + Mbs; 253757b952d6SSatish Balay masked2 = masked1 + Mbs; 253857b952d6SSatish Balay rowcount = 0; nzcount = 0; 253957b952d6SSatish Balay for (i=0; i<mbs; i++) { 254057b952d6SSatish Balay dcount = 0; 254157b952d6SSatish Balay odcount = 0; 254257b952d6SSatish Balay for (j=0; j<bs; j++) { 254357b952d6SSatish Balay kmax = locrowlens[rowcount]; 254457b952d6SSatish Balay for (k=0; k<kmax; k++) { 254557b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 254657b952d6SSatish Balay if (!mask[tmp]) { 254757b952d6SSatish Balay mask[tmp] = 1; 254857b952d6SSatish Balay if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 254957b952d6SSatish Balay else masked1[dcount++] = tmp; 255057b952d6SSatish Balay } 255157b952d6SSatish Balay } 255257b952d6SSatish Balay rowcount++; 255357b952d6SSatish Balay } 2554cee3aa6bSSatish Balay 255557b952d6SSatish Balay dlens[i] = dcount; 255657b952d6SSatish Balay odlens[i] = odcount; 2557cee3aa6bSSatish Balay 255857b952d6SSatish Balay /* zero out the mask elements we set */ 255957b952d6SSatish Balay for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 256057b952d6SSatish Balay for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 256157b952d6SSatish Balay } 2562cee3aa6bSSatish Balay 256357b952d6SSatish Balay /* create our matrix */ 256447a75d0bSBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 256557b952d6SSatish Balay A = *newmat; 25666d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 256757b952d6SSatish Balay 256857b952d6SSatish Balay if (!rank) { 2569*87828ca2SBarry Smith ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 257057b952d6SSatish Balay /* read in my part of the matrix numerical values */ 257157b952d6SSatish Balay nz = procsnz[0]; 257257b952d6SSatish Balay vals = buf; 2573cee3aa6bSSatish Balay mycols = ibuf; 2574cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2575e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2576cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2577537820f0SBarry Smith 257857b952d6SSatish Balay /* insert into matrix */ 257957b952d6SSatish Balay jj = rstart*bs; 258057b952d6SSatish Balay for (i=0; i<m; i++) { 2581b48991e4SBarry Smith ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 258257b952d6SSatish Balay mycols += locrowlens[i]; 258357b952d6SSatish Balay vals += locrowlens[i]; 258457b952d6SSatish Balay jj++; 258557b952d6SSatish Balay } 258657b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 258757b952d6SSatish Balay for (i=1; i<size-1; i++) { 258857b952d6SSatish Balay nz = procsnz[i]; 258957b952d6SSatish Balay vals = buf; 2590e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2591ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 259257b952d6SSatish Balay } 259357b952d6SSatish Balay /* the last proc */ 259457b952d6SSatish Balay if (size != 1){ 259557b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2596cee3aa6bSSatish Balay vals = buf; 2597e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 259857b952d6SSatish Balay for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2599ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 260057b952d6SSatish Balay } 2601606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 2602d64ed03dSBarry Smith } else { 260357b952d6SSatish Balay /* receive numeric values */ 2604*87828ca2SBarry Smith ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 260557b952d6SSatish Balay 260657b952d6SSatish Balay /* receive message of values*/ 260757b952d6SSatish Balay vals = buf; 2608cee3aa6bSSatish Balay mycols = ibuf; 2609ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2610ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 261129bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 261257b952d6SSatish Balay 261357b952d6SSatish Balay /* insert into matrix */ 261457b952d6SSatish Balay jj = rstart*bs; 2615cee3aa6bSSatish Balay for (i=0; i<m; i++) { 2616b48991e4SBarry Smith ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 261757b952d6SSatish Balay mycols += locrowlens[i]; 261857b952d6SSatish Balay vals += locrowlens[i]; 261957b952d6SSatish Balay jj++; 262057b952d6SSatish Balay } 262157b952d6SSatish Balay } 2622606d414cSSatish Balay ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2623606d414cSSatish Balay ierr = PetscFree(buf);CHKERRQ(ierr); 2624606d414cSSatish Balay ierr = PetscFree(ibuf);CHKERRQ(ierr); 2625606d414cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 2626606d414cSSatish Balay ierr = PetscFree(dlens);CHKERRQ(ierr); 2627606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 26286d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 26296d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 26303a40ed3dSBarry Smith PetscFunctionReturn(0); 263157b952d6SSatish Balay } 2632273d9f13SBarry Smith EXTERN_C_END 263357b952d6SSatish Balay 26344a2ae208SSatish Balay #undef __FUNCT__ 26354a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2636133cdb44SSatish Balay /*@ 2637133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2638133cdb44SSatish Balay 2639133cdb44SSatish Balay Input Parameters: 2640133cdb44SSatish Balay . mat - the matrix 2641133cdb44SSatish Balay . fact - factor 2642133cdb44SSatish Balay 2643fee21e36SBarry Smith Collective on Mat 2644fee21e36SBarry Smith 26458c890885SBarry Smith Level: advanced 26468c890885SBarry Smith 2647133cdb44SSatish Balay Notes: 2648133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2649133cdb44SSatish Balay 2650133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2651133cdb44SSatish Balay 2652133cdb44SSatish Balay .seealso: MatSetOption() 2653133cdb44SSatish Balay @*/ 2654329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2655133cdb44SSatish Balay { 265625fdafccSSatish Balay Mat_MPIBAIJ *baij; 2657273d9f13SBarry Smith int ierr; 2658273d9f13SBarry Smith PetscTruth flg; 2659133cdb44SSatish Balay 2660133cdb44SSatish Balay PetscFunctionBegin; 2661133cdb44SSatish Balay PetscValidHeaderSpecific(mat,MAT_COOKIE); 2662273d9f13SBarry Smith ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg);CHKERRQ(ierr); 2663273d9f13SBarry Smith if (!flg) { 266429bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only."); 2665133cdb44SSatish Balay } 2666133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 2667133cdb44SSatish Balay baij->ht_fact = fact; 2668133cdb44SSatish Balay PetscFunctionReturn(0); 2669133cdb44SSatish Balay } 2670f2a5309cSSatish Balay 26714a2ae208SSatish Balay #undef __FUNCT__ 26724a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2673435da068SBarry Smith int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2674f2a5309cSSatish Balay { 2675f2a5309cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2676f2a5309cSSatish Balay PetscFunctionBegin; 2677f2a5309cSSatish Balay *Ad = a->A; 2678f2a5309cSSatish Balay *Ao = a->B; 2679195d93cdSBarry Smith *colmap = a->garray; 2680f2a5309cSSatish Balay PetscFunctionReturn(0); 2681f2a5309cSSatish Balay } 2682