1*93fea6afSBarry Smith /*$Id: mpibaij.c,v 1.189 2000/04/09 04:36:25 bsmith Exp bsmith $*/ 279bdfe76SSatish Balay 377ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 4c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 579bdfe76SSatish Balay 657b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 757b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 8d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 97b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11bbb85fb3SSatish Balay extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12bbb85fb3SSatish Balay extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13bbb85fb3SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14bbb85fb3SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15bbb85fb3SSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 16*93fea6afSBarry Smith extern int MatZeroRows_SeqAIJ(Mat,IS,Scalar*); 17*93fea6afSBarry Smith 18*93fea6afSBarry Smith /* UGLY, ugly, ugly 19*93fea6afSBarry Smith When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 20*93fea6afSBarry Smith not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 21*93fea6afSBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 22*93fea6afSBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 23*93fea6afSBarry Smith into the single precision data structures. 24*93fea6afSBarry Smith */ 25*93fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 26*93fea6afSBarry Smith extern int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27*93fea6afSBarry Smith extern int MatSetValuesBlock_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28*93fea6afSBarry Smith extern int MatSetValuesBlock_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29*93fea6afSBarry Smith #else 30*93fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 31*93fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 32*93fea6afSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 33*93fea6afSBarry Smith #endif 343b2fbd54SBarry Smith 357fc3c18eSBarry Smith EXTERN_C_BEGIN 367fc3c18eSBarry Smith #undef __FUNC__ 37b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIBAIJ" 387fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat) 397fc3c18eSBarry Smith { 407fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 417fc3c18eSBarry Smith int ierr; 427fc3c18eSBarry Smith 437fc3c18eSBarry Smith PetscFunctionBegin; 447fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 457fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 467fc3c18eSBarry Smith PetscFunctionReturn(0); 477fc3c18eSBarry Smith } 487fc3c18eSBarry Smith EXTERN_C_END 497fc3c18eSBarry Smith 507fc3c18eSBarry Smith EXTERN_C_BEGIN 517fc3c18eSBarry Smith #undef __FUNC__ 52b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIBAIJ" 537fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat) 547fc3c18eSBarry Smith { 557fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 567fc3c18eSBarry Smith int ierr; 577fc3c18eSBarry Smith 587fc3c18eSBarry Smith PetscFunctionBegin; 597fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 607fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 617fc3c18eSBarry Smith PetscFunctionReturn(0); 627fc3c18eSBarry Smith } 637fc3c18eSBarry Smith EXTERN_C_END 647fc3c18eSBarry Smith 65537820f0SBarry Smith /* 66537820f0SBarry Smith Local utility routine that creates a mapping from the global column 6757b952d6SSatish Balay number to the local number in the off-diagonal part of the local 6857b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 6957b952d6SSatish Balay length of colmap equals the global matrix length. 7057b952d6SSatish Balay */ 715615d1e5SSatish Balay #undef __FUNC__ 72b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"CreateColmap_MPIBAIJ_Private" 7357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 7457b952d6SSatish Balay { 7557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 7657b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 77dc2900e9SSatish Balay int nbs = B->nbs,i,bs=B->bs,ierr; 7857b952d6SSatish Balay 79d64ed03dSBarry Smith PetscFunctionBegin; 80aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 810f5bd95cSBarry Smith ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 8248e59246SSatish Balay for (i=0; i<nbs; i++){ 830f5bd95cSBarry Smith ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 8448e59246SSatish Balay } 8548e59246SSatish Balay #else 86758f045eSSatish Balay baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 8757b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 88549d3d68SSatish Balay ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 89928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 9048e59246SSatish Balay #endif 913a40ed3dSBarry Smith PetscFunctionReturn(0); 9257b952d6SSatish Balay } 9357b952d6SSatish Balay 9480c1aa95SSatish Balay #define CHUNKSIZE 10 9580c1aa95SSatish Balay 96f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 9780c1aa95SSatish Balay { \ 9880c1aa95SSatish Balay \ 9980c1aa95SSatish Balay brow = row/bs; \ 10080c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 101ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 10280c1aa95SSatish Balay bcol = col/bs; \ 10380c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 104ab26458aSBarry Smith low = 0; high = nrow; \ 105ab26458aSBarry Smith while (high-low > 3) { \ 106ab26458aSBarry Smith t = (low+high)/2; \ 107ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 108ab26458aSBarry Smith else low = t; \ 109ab26458aSBarry Smith } \ 110ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 11180c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 11280c1aa95SSatish Balay if (rp[_i] == bcol) { \ 11380c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 114eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 115eada6651SSatish Balay else *bap = value; \ 116ac7a638eSSatish Balay goto a_noinsert; \ 11780c1aa95SSatish Balay } \ 11880c1aa95SSatish Balay } \ 11989280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 120a8c6a408SBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 12180c1aa95SSatish Balay if (nrow >= rmax) { \ 12280c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 12380c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 1243eda8832SBarry Smith MatScalar *new_a; \ 12580c1aa95SSatish Balay \ 126a8c6a408SBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 12789280ab3SLois Curfman McInnes \ 12880c1aa95SSatish Balay /* malloc new storage space */ \ 1293eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 1303eda8832SBarry Smith new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 13180c1aa95SSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 13280c1aa95SSatish Balay new_i = new_j + new_nz; \ 13380c1aa95SSatish Balay \ 13480c1aa95SSatish Balay /* copy over old data into new slots */ \ 13580c1aa95SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 13680c1aa95SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 137549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 13880c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 1393eda8832SBarry Smith ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 1403eda8832SBarry Smith ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 142549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 1433eda8832SBarry Smith aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 14480c1aa95SSatish Balay /* free up old matrix storage */ \ 145606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); \ 146606d414cSSatish Balay if (!a->singlemalloc) { \ 147606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); \ 148606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr);\ 149606d414cSSatish Balay } \ 15080c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1517c922b88SBarry Smith a->singlemalloc = PETSC_TRUE; \ 15280c1aa95SSatish Balay \ 15380c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 154ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 1553eda8832SBarry Smith PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 15680c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 15780c1aa95SSatish Balay a->reallocs++; \ 15880c1aa95SSatish Balay a->nz++; \ 15980c1aa95SSatish Balay } \ 16080c1aa95SSatish Balay N = nrow++ - 1; \ 16180c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 16280c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 16380c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1643eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 16580c1aa95SSatish Balay } \ 1663eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 16780c1aa95SSatish Balay rp[_i] = bcol; \ 16880c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 169ac7a638eSSatish Balay a_noinsert:; \ 17080c1aa95SSatish Balay ailen[brow] = nrow; \ 17180c1aa95SSatish Balay } 17257b952d6SSatish Balay 173ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 174ac7a638eSSatish Balay { \ 175ac7a638eSSatish Balay brow = row/bs; \ 176ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 177ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 178ac7a638eSSatish Balay bcol = col/bs; \ 179ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 180ac7a638eSSatish Balay low = 0; high = nrow; \ 181ac7a638eSSatish Balay while (high-low > 3) { \ 182ac7a638eSSatish Balay t = (low+high)/2; \ 183ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 184ac7a638eSSatish Balay else low = t; \ 185ac7a638eSSatish Balay } \ 186ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 187ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 188ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 189ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 190ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 191ac7a638eSSatish Balay else *bap = value; \ 192ac7a638eSSatish Balay goto b_noinsert; \ 193ac7a638eSSatish Balay } \ 194ac7a638eSSatish Balay } \ 19589280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 196a8c6a408SBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 197ac7a638eSSatish Balay if (nrow >= rmax) { \ 198ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 19974c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 2003eda8832SBarry Smith MatScalar *new_a; \ 201ac7a638eSSatish Balay \ 202a8c6a408SBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 20389280ab3SLois Curfman McInnes \ 204ac7a638eSSatish Balay /* malloc new storage space */ \ 2053eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 2063eda8832SBarry Smith new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 207ac7a638eSSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 208ac7a638eSSatish Balay new_i = new_j + new_nz; \ 209ac7a638eSSatish Balay \ 210ac7a638eSSatish Balay /* copy over old data into new slots */ \ 211ac7a638eSSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 21274c639caSSatish Balay for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 213549d3d68SSatish Balay ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 214ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 2153eda8832SBarry Smith ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 2163eda8832SBarry Smith ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 2173eda8832SBarry Smith ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 218549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 2193eda8832SBarry Smith ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 220ac7a638eSSatish Balay /* free up old matrix storage */ \ 221606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); \ 222606d414cSSatish Balay if (!b->singlemalloc) { \ 223606d414cSSatish Balay ierr = PetscFree(b->i);CHKERRQ(ierr); \ 224606d414cSSatish Balay ierr = PetscFree(b->j);CHKERRQ(ierr); \ 225606d414cSSatish Balay } \ 22674c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 2277c922b88SBarry Smith b->singlemalloc = PETSC_TRUE; \ 228ac7a638eSSatish Balay \ 229ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 230ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 2313eda8832SBarry Smith PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 23274c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 23374c639caSSatish Balay b->reallocs++; \ 23474c639caSSatish Balay b->nz++; \ 235ac7a638eSSatish Balay } \ 236ac7a638eSSatish Balay N = nrow++ - 1; \ 237ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 238ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 239ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 2403eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 241ac7a638eSSatish Balay } \ 2423eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 243ac7a638eSSatish Balay rp[_i] = bcol; \ 244ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 245ac7a638eSSatish Balay b_noinsert:; \ 246ac7a638eSSatish Balay bilen[brow] = nrow; \ 247ac7a638eSSatish Balay } 248ac7a638eSSatish Balay 249*93fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2505615d1e5SSatish Balay #undef __FUNC__ 251b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ" 252ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 25357b952d6SSatish Balay { 254*93fea6afSBarry Smith int ierr,i,N = m*n; 255*93fea6afSBarry Smith MatScalar *vsingle = (MatScalar*)v; 256*93fea6afSBarry Smith 257*93fea6afSBarry Smith PetscFunctionBegin; 258*93fea6afSBarry Smith for (i=0; i<N; i++) { 259*93fea6afSBarry Smith vsingle[i] = v[i]; 260*93fea6afSBarry Smith } 261*93fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 262*93fea6afSBarry Smith PetscFunctionReturn(0); 263*93fea6afSBarry Smith } 264*93fea6afSBarry Smith 265*93fea6afSBarry Smith #undef __FUNC__ 266*93fea6afSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ" 267*93fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 268*93fea6afSBarry Smith { 269*93fea6afSBarry Smith int ierr,i,N = m*n*((Mat_MPIBAIJ*)mat->data)->bs2; 270*93fea6afSBarry Smith MatScalar *vsingle = (MatScalar*)v; 271*93fea6afSBarry Smith 272*93fea6afSBarry Smith PetscFunctionBegin; 273*93fea6afSBarry Smith for (i=0; i<N; i++) { 274*93fea6afSBarry Smith vsingle[i] = v[i]; 275*93fea6afSBarry Smith } 276*93fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 277*93fea6afSBarry Smith PetscFunctionReturn(0); 278*93fea6afSBarry Smith } 279*93fea6afSBarry Smith #endif 280*93fea6afSBarry Smith 281*93fea6afSBarry Smith #undef __FUNC__ 282*93fea6afSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ" 283*93fea6afSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 284*93fea6afSBarry Smith { 28557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 286*93fea6afSBarry Smith MatScalar value; 2874fa0d573SSatish Balay int ierr,i,j,row,col; 2884fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 2894fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 2904fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 29157b952d6SSatish Balay 292eada6651SSatish Balay /* Some Variables required in the macro */ 29380c1aa95SSatish Balay Mat A = baij->A; 29480c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 295ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 2963eda8832SBarry Smith MatScalar *aa=a->a; 297ac7a638eSSatish Balay 298ac7a638eSSatish Balay Mat B = baij->B; 299ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 300ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 3013eda8832SBarry Smith MatScalar *ba=b->a; 302ac7a638eSSatish Balay 303ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 304ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 3053eda8832SBarry Smith MatScalar *ap,*bap; 30680c1aa95SSatish Balay 307d64ed03dSBarry Smith PetscFunctionBegin; 30857b952d6SSatish Balay for (i=0; i<m; i++) { 3095ef9f2a5SBarry Smith if (im[i] < 0) continue; 310aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 311a8c6a408SBarry Smith if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 312639f9d9dSBarry Smith #endif 31357b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 31457b952d6SSatish Balay row = im[i] - rstart_orig; 31557b952d6SSatish Balay for (j=0; j<n; j++) { 31657b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 31757b952d6SSatish Balay col = in[j] - cstart_orig; 31857b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 319f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 32080c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 32173959e64SBarry Smith } else if (in[j] < 0) continue; 322aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 323a8c6a408SBarry Smith else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 324639f9d9dSBarry Smith #endif 32557b952d6SSatish Balay else { 32657b952d6SSatish Balay if (mat->was_assembled) { 327905e6a2fSBarry Smith if (!baij->colmap) { 328905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 329905e6a2fSBarry Smith } 330aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 3310f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 332fa46199cSSatish Balay col = col - 1 + in[j]%bs; 33348e59246SSatish Balay #else 334905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 33548e59246SSatish Balay #endif 33657b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 33757b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 33857b952d6SSatish Balay col = in[j]; 3399bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 3409bf004c3SSatish Balay B = baij->B; 3419bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 3429bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 3439bf004c3SSatish Balay ba=b->a; 34457b952d6SSatish Balay } 345d64ed03dSBarry Smith } else col = in[j]; 34657b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 347ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 348ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 34957b952d6SSatish Balay } 35057b952d6SSatish Balay } 351d64ed03dSBarry Smith } else { 35290f02eecSBarry Smith if (!baij->donotstash) { 353ff2fd236SBarry Smith if (roworiented) { 3548798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 355ff2fd236SBarry Smith } else { 3568798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 35757b952d6SSatish Balay } 35857b952d6SSatish Balay } 35957b952d6SSatish Balay } 36090f02eecSBarry Smith } 3613a40ed3dSBarry Smith PetscFunctionReturn(0); 36257b952d6SSatish Balay } 36357b952d6SSatish Balay 364ab26458aSBarry Smith #undef __FUNC__ 365b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ" 366*93fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 367ab26458aSBarry Smith { 368ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 369*93fea6afSBarry Smith MatScalar *value,*barray=baij->barray; 3707ef1d9bdSSatish Balay int ierr,i,j,ii,jj,row,col; 371ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 372ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 373ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 374ab26458aSBarry Smith 375b16ae2b1SBarry Smith PetscFunctionBegin; 37630793edcSSatish Balay if(!barray) { 377*93fea6afSBarry Smith baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 37830793edcSSatish Balay } 37930793edcSSatish Balay 380ab26458aSBarry Smith if (roworiented) { 381ab26458aSBarry Smith stepval = (n-1)*bs; 382ab26458aSBarry Smith } else { 383ab26458aSBarry Smith stepval = (m-1)*bs; 384ab26458aSBarry Smith } 385ab26458aSBarry Smith for (i=0; i<m; i++) { 3865ef9f2a5SBarry Smith if (im[i] < 0) continue; 387aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 3885ef9f2a5SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 389ab26458aSBarry Smith #endif 390ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 391ab26458aSBarry Smith row = im[i] - rstart; 392ab26458aSBarry Smith for (j=0; j<n; j++) { 39315b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 39415b57d14SSatish Balay if ((roworiented) && (n == 1)) { 39515b57d14SSatish Balay barray = v + i*bs2; 39615b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 39715b57d14SSatish Balay barray = v + j*bs2; 39815b57d14SSatish Balay } else { /* Here a copy is required */ 399ab26458aSBarry Smith if (roworiented) { 400ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 401ab26458aSBarry Smith } else { 402ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 403abef11f7SSatish Balay } 40447513183SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 40547513183SBarry Smith for (jj=0; jj<bs; jj++) { 40630793edcSSatish Balay *barray++ = *value++; 40747513183SBarry Smith } 40847513183SBarry Smith } 40930793edcSSatish Balay barray -=bs2; 41015b57d14SSatish Balay } 411abef11f7SSatish Balay 412abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 413abef11f7SSatish Balay col = in[j] - cstart; 414*93fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 415ab26458aSBarry Smith } 4165ef9f2a5SBarry Smith else if (in[j] < 0) continue; 417aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 4185ef9f2a5SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 419ab26458aSBarry Smith #endif 420ab26458aSBarry Smith else { 421ab26458aSBarry Smith if (mat->was_assembled) { 422ab26458aSBarry Smith if (!baij->colmap) { 423ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 424ab26458aSBarry Smith } 425a5eb4965SSatish Balay 426aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 427aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 428fa46199cSSatish Balay { int data; 4290f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4300f5bd95cSBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 431fa46199cSSatish Balay } 43248e59246SSatish Balay #else 4330f5bd95cSBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 434a5eb4965SSatish Balay #endif 43548e59246SSatish Balay #endif 436aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 4370f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 438fa46199cSSatish Balay col = (col - 1)/bs; 43948e59246SSatish Balay #else 440a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 44148e59246SSatish Balay #endif 442ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 443ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 444ab26458aSBarry Smith col = in[j]; 445ab26458aSBarry Smith } 446ab26458aSBarry Smith } 447ab26458aSBarry Smith else col = in[j]; 448*93fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 449ab26458aSBarry Smith } 450ab26458aSBarry Smith } 451d64ed03dSBarry Smith } else { 452ab26458aSBarry Smith if (!baij->donotstash) { 453ff2fd236SBarry Smith if (roworiented) { 4548798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 455ff2fd236SBarry Smith } else { 4568798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 457ff2fd236SBarry Smith } 458abef11f7SSatish Balay } 459ab26458aSBarry Smith } 460ab26458aSBarry Smith } 4613a40ed3dSBarry Smith PetscFunctionReturn(0); 462ab26458aSBarry Smith } 4630bdbc534SSatish Balay #define HASH_KEY 0.6180339887 464c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 465c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 466c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 4675615d1e5SSatish Balay #undef __FUNC__ 468b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ_HT" 4690bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4700bdbc534SSatish Balay { 4710bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 4720bdbc534SSatish Balay int ierr,i,j,row,col; 4730bdbc534SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 474c2760754SSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 475c2760754SSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 476329f5518SBarry Smith PetscReal tmp; 4773eda8832SBarry Smith MatScalar ** HD = baij->hd,value; 478aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 4794a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4804a15367fSSatish Balay #endif 4810bdbc534SSatish Balay 4820bdbc534SSatish Balay PetscFunctionBegin; 4830bdbc534SSatish Balay 4840bdbc534SSatish Balay for (i=0; i<m; i++) { 485aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 4860bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 4870bdbc534SSatish Balay if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 4880bdbc534SSatish Balay #endif 4890bdbc534SSatish Balay row = im[i]; 490c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 4910bdbc534SSatish Balay for (j=0; j<n; j++) { 4920bdbc534SSatish Balay col = in[j]; 4933eda8832SBarry Smith if (roworiented) value = (MatScalar)v[i*n+j]; else value = (MatScalar)v[i+j*m]; 4940bdbc534SSatish Balay /* Look up into the Hash Table */ 495c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 496c2760754SSatish Balay h1 = HASH(size,key,tmp); 4970bdbc534SSatish Balay 498c2760754SSatish Balay 499c2760754SSatish Balay idx = h1; 500aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 501187ce0cbSSatish Balay insert_ct++; 502187ce0cbSSatish Balay total_ct++; 503187ce0cbSSatish Balay if (HT[idx] != key) { 504187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 505187ce0cbSSatish Balay if (idx == size) { 506187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 507187ce0cbSSatish Balay if (idx == h1) { 508187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 509187ce0cbSSatish Balay } 510187ce0cbSSatish Balay } 511187ce0cbSSatish Balay } 512187ce0cbSSatish Balay #else 513c2760754SSatish Balay if (HT[idx] != key) { 514c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 515c2760754SSatish Balay if (idx == size) { 516c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 517c2760754SSatish Balay if (idx == h1) { 518c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 519c2760754SSatish Balay } 520c2760754SSatish Balay } 521c2760754SSatish Balay } 522187ce0cbSSatish Balay #endif 523c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 524c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 525c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 5260bdbc534SSatish Balay } 5270bdbc534SSatish Balay } else { 5280bdbc534SSatish Balay if (!baij->donotstash) { 529ff2fd236SBarry Smith if (roworiented) { 5308798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 531ff2fd236SBarry Smith } else { 5328798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5330bdbc534SSatish Balay } 5340bdbc534SSatish Balay } 5350bdbc534SSatish Balay } 5360bdbc534SSatish Balay } 537aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 538187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 539187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 540187ce0cbSSatish Balay #endif 5410bdbc534SSatish Balay PetscFunctionReturn(0); 5420bdbc534SSatish Balay } 5430bdbc534SSatish Balay 5440bdbc534SSatish Balay #undef __FUNC__ 545b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT" 5460bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 5470bdbc534SSatish Balay { 5480bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 5498798bf22SSatish Balay int ierr,i,j,ii,jj,row,col; 5500bdbc534SSatish Balay int roworiented = baij->roworiented,rstart=baij->rstart ; 551b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 552c2760754SSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 553329f5518SBarry Smith PetscReal tmp; 5543eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 5553eda8832SBarry Smith Scalar *v_t,*value; 556aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 5574a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5584a15367fSSatish Balay #endif 5590bdbc534SSatish Balay 560d0a41580SSatish Balay PetscFunctionBegin; 561d0a41580SSatish Balay 5620bdbc534SSatish Balay if (roworiented) { 5630bdbc534SSatish Balay stepval = (n-1)*bs; 5640bdbc534SSatish Balay } else { 5650bdbc534SSatish Balay stepval = (m-1)*bs; 5660bdbc534SSatish Balay } 5670bdbc534SSatish Balay for (i=0; i<m; i++) { 568aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 5690bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 5700bdbc534SSatish Balay if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 5710bdbc534SSatish Balay #endif 5720bdbc534SSatish Balay row = im[i]; 573187ce0cbSSatish Balay v_t = v + i*bs2; 574c2760754SSatish Balay if (row >= rstart && row < rend) { 5750bdbc534SSatish Balay for (j=0; j<n; j++) { 5760bdbc534SSatish Balay col = in[j]; 5770bdbc534SSatish Balay 5780bdbc534SSatish Balay /* Look up into the Hash Table */ 579c2760754SSatish Balay key = row*Nbs+col+1; 580c2760754SSatish Balay h1 = HASH(size,key,tmp); 5810bdbc534SSatish Balay 582c2760754SSatish Balay idx = h1; 583aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 584187ce0cbSSatish Balay total_ct++; 585187ce0cbSSatish Balay insert_ct++; 586187ce0cbSSatish Balay if (HT[idx] != key) { 587187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 588187ce0cbSSatish Balay if (idx == size) { 589187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 590187ce0cbSSatish Balay if (idx == h1) { 591187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 592187ce0cbSSatish Balay } 593187ce0cbSSatish Balay } 594187ce0cbSSatish Balay } 595187ce0cbSSatish Balay #else 596c2760754SSatish Balay if (HT[idx] != key) { 597c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 598c2760754SSatish Balay if (idx == size) { 599c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 600c2760754SSatish Balay if (idx == h1) { 601c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 602c2760754SSatish Balay } 603c2760754SSatish Balay } 604c2760754SSatish Balay } 605187ce0cbSSatish Balay #endif 606c2760754SSatish Balay baij_a = HD[idx]; 6070bdbc534SSatish Balay if (roworiented) { 608c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 609187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 610187ce0cbSSatish Balay value = v_t; 611187ce0cbSSatish Balay v_t += bs; 612fef45726SSatish Balay if (addv == ADD_VALUES) { 613c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 614c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 615fef45726SSatish Balay baij_a[jj] += *value++; 616b4cc0f5aSSatish Balay } 617b4cc0f5aSSatish Balay } 618fef45726SSatish Balay } else { 619c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 620c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 621fef45726SSatish Balay baij_a[jj] = *value++; 622fef45726SSatish Balay } 623fef45726SSatish Balay } 624fef45726SSatish Balay } 6250bdbc534SSatish Balay } else { 6260bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 627fef45726SSatish Balay if (addv == ADD_VALUES) { 628b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 6290bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 630fef45726SSatish Balay baij_a[jj] += *value++; 631fef45726SSatish Balay } 632fef45726SSatish Balay } 633fef45726SSatish Balay } else { 634fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 635fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 636fef45726SSatish Balay baij_a[jj] = *value++; 637fef45726SSatish Balay } 638b4cc0f5aSSatish Balay } 6390bdbc534SSatish Balay } 6400bdbc534SSatish Balay } 6410bdbc534SSatish Balay } 6420bdbc534SSatish Balay } else { 6430bdbc534SSatish Balay if (!baij->donotstash) { 6440bdbc534SSatish Balay if (roworiented) { 6458798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6460bdbc534SSatish Balay } else { 6478798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6480bdbc534SSatish Balay } 6490bdbc534SSatish Balay } 6500bdbc534SSatish Balay } 6510bdbc534SSatish Balay } 652aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 653187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 654187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 655187ce0cbSSatish Balay #endif 6560bdbc534SSatish Balay PetscFunctionReturn(0); 6570bdbc534SSatish Balay } 658133cdb44SSatish Balay 6590bdbc534SSatish Balay #undef __FUNC__ 660b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ" 661ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 662d6de1c52SSatish Balay { 663d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 664d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 66548e59246SSatish Balay int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 666d6de1c52SSatish Balay 667133cdb44SSatish Balay PetscFunctionBegin; 668d6de1c52SSatish Balay for (i=0; i<m; i++) { 669a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 670a8c6a408SBarry Smith if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 671d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 672d6de1c52SSatish Balay row = idxm[i] - bsrstart; 673d6de1c52SSatish Balay for (j=0; j<n; j++) { 674a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 675a8c6a408SBarry Smith if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 676d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 677d6de1c52SSatish Balay col = idxn[j] - bscstart; 67898dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 679d64ed03dSBarry Smith } else { 680905e6a2fSBarry Smith if (!baij->colmap) { 681905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 682905e6a2fSBarry Smith } 683aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 6840f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 685fa46199cSSatish Balay data --; 68648e59246SSatish Balay #else 68748e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 68848e59246SSatish Balay #endif 68948e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 690d9d09a02SSatish Balay else { 69148e59246SSatish Balay col = data + idxn[j]%bs; 69298dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 693d6de1c52SSatish Balay } 694d6de1c52SSatish Balay } 695d6de1c52SSatish Balay } 696d64ed03dSBarry Smith } else { 697a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 698d6de1c52SSatish Balay } 699d6de1c52SSatish Balay } 7003a40ed3dSBarry Smith PetscFunctionReturn(0); 701d6de1c52SSatish Balay } 702d6de1c52SSatish Balay 7035615d1e5SSatish Balay #undef __FUNC__ 704b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ" 705329f5518SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 706d6de1c52SSatish Balay { 707d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 708d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 709acdf5bf4SSatish Balay int ierr,i,bs2=baij->bs2; 710329f5518SBarry Smith PetscReal sum = 0.0; 7113eda8832SBarry Smith MatScalar *v; 712d6de1c52SSatish Balay 713d64ed03dSBarry Smith PetscFunctionBegin; 714d6de1c52SSatish Balay if (baij->size == 1) { 715d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 716d6de1c52SSatish Balay } else { 717d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 718d6de1c52SSatish Balay v = amat->a; 719d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++) { 720aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 721329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 722d6de1c52SSatish Balay #else 723d6de1c52SSatish Balay sum += (*v)*(*v); v++; 724d6de1c52SSatish Balay #endif 725d6de1c52SSatish Balay } 726d6de1c52SSatish Balay v = bmat->a; 727d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++) { 728aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 729329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 730d6de1c52SSatish Balay #else 731d6de1c52SSatish Balay sum += (*v)*(*v); v++; 732d6de1c52SSatish Balay #endif 733d6de1c52SSatish Balay } 734ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 735d6de1c52SSatish Balay *norm = sqrt(*norm); 736d64ed03dSBarry Smith } else { 737e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 738d6de1c52SSatish Balay } 739d64ed03dSBarry Smith } 7403a40ed3dSBarry Smith PetscFunctionReturn(0); 741d6de1c52SSatish Balay } 74257b952d6SSatish Balay 743bd7f49f5SSatish Balay 744fef45726SSatish Balay /* 745fef45726SSatish Balay Creates the hash table, and sets the table 746fef45726SSatish Balay This table is created only once. 747fef45726SSatish Balay If new entried need to be added to the matrix 748fef45726SSatish Balay then the hash table has to be destroyed and 749fef45726SSatish Balay recreated. 750fef45726SSatish Balay */ 751fef45726SSatish Balay #undef __FUNC__ 752b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private" 753329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 754596b8d2eSBarry Smith { 755596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 756596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 757596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 7580bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 759549d3d68SSatish Balay int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 760187ce0cbSSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 761fef45726SSatish Balay int *HT,key; 7623eda8832SBarry Smith MatScalar **HD; 763329f5518SBarry Smith PetscReal tmp; 764aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 7654a15367fSSatish Balay int ct=0,max=0; 7664a15367fSSatish Balay #endif 767fef45726SSatish Balay 768d64ed03dSBarry Smith PetscFunctionBegin; 7690bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 7700bdbc534SSatish Balay size = baij->ht_size; 771fef45726SSatish Balay 7720bdbc534SSatish Balay if (baij->ht) { 7730bdbc534SSatish Balay PetscFunctionReturn(0); 774596b8d2eSBarry Smith } 7750bdbc534SSatish Balay 776fef45726SSatish Balay /* Allocate Memory for Hash Table */ 7773eda8832SBarry Smith baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 778b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 779b9e4cc15SSatish Balay HD = baij->hd; 780a07cd24cSSatish Balay HT = baij->ht; 781b9e4cc15SSatish Balay 782b9e4cc15SSatish Balay 783549d3d68SSatish Balay ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 7840bdbc534SSatish Balay 785596b8d2eSBarry Smith 786596b8d2eSBarry Smith /* Loop Over A */ 7870bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 788596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 7890bdbc534SSatish Balay row = i+rstart; 7900bdbc534SSatish Balay col = aj[j]+cstart; 791596b8d2eSBarry Smith 792187ce0cbSSatish Balay key = row*Nbs + col + 1; 793c2760754SSatish Balay h1 = HASH(size,key,tmp); 7940bdbc534SSatish Balay for (k=0; k<size; k++){ 7950bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 7960bdbc534SSatish Balay HT[(h1+k)%size] = key; 7970bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 798596b8d2eSBarry Smith break; 799aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 800187ce0cbSSatish Balay } else { 801187ce0cbSSatish Balay ct++; 802187ce0cbSSatish Balay #endif 803596b8d2eSBarry Smith } 804187ce0cbSSatish Balay } 805aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 806187ce0cbSSatish Balay if (k> max) max = k; 807187ce0cbSSatish Balay #endif 808596b8d2eSBarry Smith } 809596b8d2eSBarry Smith } 810596b8d2eSBarry Smith /* Loop Over B */ 8110bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 812596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 8130bdbc534SSatish Balay row = i+rstart; 8140bdbc534SSatish Balay col = garray[bj[j]]; 815187ce0cbSSatish Balay key = row*Nbs + col + 1; 816c2760754SSatish Balay h1 = HASH(size,key,tmp); 8170bdbc534SSatish Balay for (k=0; k<size; k++){ 8180bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 8190bdbc534SSatish Balay HT[(h1+k)%size] = key; 8200bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 821596b8d2eSBarry Smith break; 822aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 823187ce0cbSSatish Balay } else { 824187ce0cbSSatish Balay ct++; 825187ce0cbSSatish Balay #endif 826596b8d2eSBarry Smith } 827187ce0cbSSatish Balay } 828aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 829187ce0cbSSatish Balay if (k> max) max = k; 830187ce0cbSSatish Balay #endif 831596b8d2eSBarry Smith } 832596b8d2eSBarry Smith } 833596b8d2eSBarry Smith 834596b8d2eSBarry Smith /* Print Summary */ 835aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 836c38d4ed2SBarry Smith for (i=0,j=0; i<size; i++) { 837596b8d2eSBarry Smith if (HT[i]) {j++;} 838c38d4ed2SBarry Smith } 839329f5518SBarry Smith PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 840187ce0cbSSatish Balay #endif 8413a40ed3dSBarry Smith PetscFunctionReturn(0); 842596b8d2eSBarry Smith } 84357b952d6SSatish Balay 844bbb85fb3SSatish Balay #undef __FUNC__ 845b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ" 846bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 847bbb85fb3SSatish Balay { 848bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 849ff2fd236SBarry Smith int ierr,nstash,reallocs; 850bbb85fb3SSatish Balay InsertMode addv; 851bbb85fb3SSatish Balay 852bbb85fb3SSatish Balay PetscFunctionBegin; 853bbb85fb3SSatish Balay if (baij->donotstash) { 854bbb85fb3SSatish Balay PetscFunctionReturn(0); 855bbb85fb3SSatish Balay } 856bbb85fb3SSatish Balay 857bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 858bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 859bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 860bbb85fb3SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 861bbb85fb3SSatish Balay } 862bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 863bbb85fb3SSatish Balay 8648798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 8658798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 8668798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 8675a655dc6SBarry Smith PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 8688798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 8695a655dc6SBarry Smith PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 870bbb85fb3SSatish Balay PetscFunctionReturn(0); 871bbb85fb3SSatish Balay } 872bbb85fb3SSatish Balay 873bbb85fb3SSatish Balay #undef __FUNC__ 874b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ" 875bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 876bbb85fb3SSatish Balay { 877bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 878a2d1c673SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 879a2d1c673SSatish Balay int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 8807c922b88SBarry Smith int *row,*col,other_disassembled; 8817c922b88SBarry Smith PetscTruth r1,r2,r3; 8823eda8832SBarry Smith MatScalar *val; 883bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 884bbb85fb3SSatish Balay 885bbb85fb3SSatish Balay PetscFunctionBegin; 886bbb85fb3SSatish Balay if (!baij->donotstash) { 887a2d1c673SSatish Balay while (1) { 8888798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 889a2d1c673SSatish Balay if (!flg) break; 890a2d1c673SSatish Balay 891bbb85fb3SSatish Balay for (i=0; i<n;) { 892bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 893bbb85fb3SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 894bbb85fb3SSatish Balay if (j < n) ncols = j-i; 895bbb85fb3SSatish Balay else ncols = n-i; 896bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 897*93fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 898bbb85fb3SSatish Balay i = j; 899bbb85fb3SSatish Balay } 900bbb85fb3SSatish Balay } 9018798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 902a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 903a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 904a2d1c673SSatish Balay restore the original flags */ 905a2d1c673SSatish Balay r1 = baij->roworiented; 906a2d1c673SSatish Balay r2 = a->roworiented; 907a2d1c673SSatish Balay r3 = b->roworiented; 9087c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 9097c922b88SBarry Smith a->roworiented = PETSC_FALSE; 9107c922b88SBarry Smith b->roworiented = PETSC_FALSE; 911a2d1c673SSatish Balay while (1) { 9128798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 913a2d1c673SSatish Balay if (!flg) break; 914a2d1c673SSatish Balay 915a2d1c673SSatish Balay for (i=0; i<n;) { 916a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 917a2d1c673SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 918a2d1c673SSatish Balay if (j < n) ncols = j-i; 919a2d1c673SSatish Balay else ncols = n-i; 920*93fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 921a2d1c673SSatish Balay i = j; 922a2d1c673SSatish Balay } 923a2d1c673SSatish Balay } 9248798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 925a2d1c673SSatish Balay baij->roworiented = r1; 926a2d1c673SSatish Balay a->roworiented = r2; 927a2d1c673SSatish Balay b->roworiented = r3; 928bbb85fb3SSatish Balay } 929bbb85fb3SSatish Balay 930bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 931bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 932bbb85fb3SSatish Balay 933bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 934bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 935bbb85fb3SSatish Balay /* 936bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 937bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 938bbb85fb3SSatish Balay */ 939bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 940bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 941bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 942bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 943bbb85fb3SSatish Balay } 944bbb85fb3SSatish Balay } 945bbb85fb3SSatish Balay 946bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 947bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 948bbb85fb3SSatish Balay } 949bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 950bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 951bbb85fb3SSatish Balay 952aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 953bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 954c38d4ed2SBarry Smith PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 955bbb85fb3SSatish Balay baij->ht_total_ct = 0; 956bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 957bbb85fb3SSatish Balay } 958bbb85fb3SSatish Balay #endif 959bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 960bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 961bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 962bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 963bbb85fb3SSatish Balay } 964bbb85fb3SSatish Balay 965606d414cSSatish Balay if (baij->rowvalues) { 966606d414cSSatish Balay ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 967606d414cSSatish Balay baij->rowvalues = 0; 968606d414cSSatish Balay } 969bbb85fb3SSatish Balay PetscFunctionReturn(0); 970bbb85fb3SSatish Balay } 97157b952d6SSatish Balay 9725615d1e5SSatish Balay #undef __FUNC__ 973b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_Binary" 97457b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 97557b952d6SSatish Balay { 97657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 97757b952d6SSatish Balay int ierr; 97857b952d6SSatish Balay 979d64ed03dSBarry Smith PetscFunctionBegin; 98057b952d6SSatish Balay if (baij->size == 1) { 98157b952d6SSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 982a8c6a408SBarry Smith } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 9833a40ed3dSBarry Smith PetscFunctionReturn(0); 98457b952d6SSatish Balay } 98557b952d6SSatish Balay 9865615d1e5SSatish Balay #undef __FUNC__ 987b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket" 9887b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 98957b952d6SSatish Balay { 99057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 99177ed5343SBarry Smith int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 9926831982aSBarry Smith PetscTruth isascii,isdraw; 99357b952d6SSatish Balay 994d64ed03dSBarry Smith PetscFunctionBegin; 9956831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 9966831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 9970f5bd95cSBarry Smith if (isascii) { 998d41123aaSBarry Smith ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 999639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 10004e220ebcSLois Curfman McInnes MatInfo info; 1001d132466eSBarry Smith ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1002d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 10036831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 10044e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 10056831982aSBarry Smith baij->bs,(int)info.memory);CHKERRQ(ierr); 1006d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 10076831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1008d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 10096831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 10106831982aSBarry Smith ierr = ViewerFlush(viewer);CHKERRQ(ierr); 101157b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 10123a40ed3dSBarry Smith PetscFunctionReturn(0); 1013d64ed03dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 10146831982aSBarry Smith ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 10153a40ed3dSBarry Smith PetscFunctionReturn(0); 101657b952d6SSatish Balay } 101757b952d6SSatish Balay } 101857b952d6SSatish Balay 10190f5bd95cSBarry Smith if (isdraw) { 102057b952d6SSatish Balay Draw draw; 102157b952d6SSatish Balay PetscTruth isnull; 102277ed5343SBarry Smith ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 10233a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 102457b952d6SSatish Balay } 102557b952d6SSatish Balay 102657b952d6SSatish Balay if (size == 1) { 102757b952d6SSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1028d64ed03dSBarry Smith } else { 102957b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 103057b952d6SSatish Balay Mat A; 103157b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 10323eda8832SBarry Smith int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 10333eda8832SBarry Smith MatScalar *a; 103457b952d6SSatish Balay 103557b952d6SSatish Balay if (!rank) { 103655843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1037d64ed03dSBarry Smith } else { 103855843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 103957b952d6SSatish Balay } 104057b952d6SSatish Balay PLogObjectParent(mat,A); 104157b952d6SSatish Balay 104257b952d6SSatish Balay /* copy over the A part */ 104357b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 104457b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 104557b952d6SSatish Balay rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 104657b952d6SSatish Balay 104757b952d6SSatish Balay for (i=0; i<mbs; i++) { 104857b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 104957b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 105057b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 105157b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 105257b952d6SSatish Balay for (k=0; k<bs; k++) { 1053*93fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1054cee3aa6bSSatish Balay col++; a += bs; 105557b952d6SSatish Balay } 105657b952d6SSatish Balay } 105757b952d6SSatish Balay } 105857b952d6SSatish Balay /* copy over the B part */ 105957b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 106057b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 106157b952d6SSatish Balay for (i=0; i<mbs; i++) { 106257b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 106357b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 106457b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 106557b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 106657b952d6SSatish Balay for (k=0; k<bs; k++) { 1067*93fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1068cee3aa6bSSatish Balay col++; a += bs; 106957b952d6SSatish Balay } 107057b952d6SSatish Balay } 107157b952d6SSatish Balay } 1072606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 10736d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 10746d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 107555843e3eSBarry Smith /* 107655843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 107755843e3eSBarry Smith synchronized across all processors that share the Draw object 107855843e3eSBarry Smith */ 10796831982aSBarry Smith if (!rank) { 10806831982aSBarry Smith Viewer sviewer; 10816831982aSBarry Smith ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 10826831982aSBarry Smith ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 10836831982aSBarry Smith ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 108457b952d6SSatish Balay } 108557b952d6SSatish Balay ierr = MatDestroy(A);CHKERRQ(ierr); 108657b952d6SSatish Balay } 10873a40ed3dSBarry Smith PetscFunctionReturn(0); 108857b952d6SSatish Balay } 108957b952d6SSatish Balay 109057b952d6SSatish Balay 109157b952d6SSatish Balay 10925615d1e5SSatish Balay #undef __FUNC__ 1093b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ" 1094e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer) 109557b952d6SSatish Balay { 109657b952d6SSatish Balay int ierr; 10976831982aSBarry Smith PetscTruth isascii,isdraw,issocket,isbinary; 109857b952d6SSatish Balay 1099d64ed03dSBarry Smith PetscFunctionBegin; 11006831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 11016831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 11026831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 11036831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 11040f5bd95cSBarry Smith if (isascii || isdraw || issocket || isbinary) { 11057b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 11065cd90555SBarry Smith } else { 11070f5bd95cSBarry Smith SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 110857b952d6SSatish Balay } 11093a40ed3dSBarry Smith PetscFunctionReturn(0); 111057b952d6SSatish Balay } 111157b952d6SSatish Balay 11125615d1e5SSatish Balay #undef __FUNC__ 1113b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ" 1114e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat) 111579bdfe76SSatish Balay { 111679bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 111779bdfe76SSatish Balay int ierr; 111879bdfe76SSatish Balay 1119d64ed03dSBarry Smith PetscFunctionBegin; 112098dd23e9SBarry Smith 112198dd23e9SBarry Smith if (mat->mapping) { 112298dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 112398dd23e9SBarry Smith } 112498dd23e9SBarry Smith if (mat->bmapping) { 112598dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 112698dd23e9SBarry Smith } 112761b13de0SBarry Smith if (mat->rmap) { 112861b13de0SBarry Smith ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 112961b13de0SBarry Smith } 113061b13de0SBarry Smith if (mat->cmap) { 113161b13de0SBarry Smith ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 113261b13de0SBarry Smith } 1133aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1134e1311b90SBarry Smith PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 113579bdfe76SSatish Balay #endif 113679bdfe76SSatish Balay 11378798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 11388798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 11398798bf22SSatish Balay 1140606d414cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 114179bdfe76SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 114279bdfe76SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1143aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 11440f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 114548e59246SSatish Balay #else 1146606d414cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 114748e59246SSatish Balay #endif 1148606d414cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1149606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1150606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1151606d414cSSatish Balay if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1152606d414cSSatish Balay if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1153606d414cSSatish Balay if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1154606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 115579bdfe76SSatish Balay PLogObjectDestroy(mat); 115679bdfe76SSatish Balay PetscHeaderDestroy(mat); 11573a40ed3dSBarry Smith PetscFunctionReturn(0); 115879bdfe76SSatish Balay } 115979bdfe76SSatish Balay 11605615d1e5SSatish Balay #undef __FUNC__ 1161b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ" 1162ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1163cee3aa6bSSatish Balay { 1164cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 116547b4a8eaSLois Curfman McInnes int ierr,nt; 1166cee3aa6bSSatish Balay 1167d64ed03dSBarry Smith PetscFunctionBegin; 1168e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 116947b4a8eaSLois Curfman McInnes if (nt != a->n) { 1170a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 117147b4a8eaSLois Curfman McInnes } 1172e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 117347b4a8eaSLois Curfman McInnes if (nt != a->m) { 1174a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 117547b4a8eaSLois Curfman McInnes } 117643a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1177f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 117843a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1179f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 118043a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 11813a40ed3dSBarry Smith PetscFunctionReturn(0); 1182cee3aa6bSSatish Balay } 1183cee3aa6bSSatish Balay 11845615d1e5SSatish Balay #undef __FUNC__ 1185b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ" 1186ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1187cee3aa6bSSatish Balay { 1188cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1189cee3aa6bSSatish Balay int ierr; 1190d64ed03dSBarry Smith 1191d64ed03dSBarry Smith PetscFunctionBegin; 119243a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1193f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 119443a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1195f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 11963a40ed3dSBarry Smith PetscFunctionReturn(0); 1197cee3aa6bSSatish Balay } 1198cee3aa6bSSatish Balay 11995615d1e5SSatish Balay #undef __FUNC__ 1200b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ" 12017c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1202cee3aa6bSSatish Balay { 1203cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1204cee3aa6bSSatish Balay int ierr; 1205cee3aa6bSSatish Balay 1206d64ed03dSBarry Smith PetscFunctionBegin; 1207cee3aa6bSSatish Balay /* do nondiagonal part */ 12087c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209cee3aa6bSSatish Balay /* send it on its way */ 1210537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1211cee3aa6bSSatish Balay /* do local part */ 12127c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1213cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1214cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1215cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1216639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 12173a40ed3dSBarry Smith PetscFunctionReturn(0); 1218cee3aa6bSSatish Balay } 1219cee3aa6bSSatish Balay 12205615d1e5SSatish Balay #undef __FUNC__ 1221b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ" 12227c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1223cee3aa6bSSatish Balay { 1224cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1225cee3aa6bSSatish Balay int ierr; 1226cee3aa6bSSatish Balay 1227d64ed03dSBarry Smith PetscFunctionBegin; 1228cee3aa6bSSatish Balay /* do nondiagonal part */ 12297c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1230cee3aa6bSSatish Balay /* send it on its way */ 1231537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1232cee3aa6bSSatish Balay /* do local part */ 12337c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1234cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1235cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1236cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1237537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 12383a40ed3dSBarry Smith PetscFunctionReturn(0); 1239cee3aa6bSSatish Balay } 1240cee3aa6bSSatish Balay 1241cee3aa6bSSatish Balay /* 1242cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1243cee3aa6bSSatish Balay diagonal block 1244cee3aa6bSSatish Balay */ 12455615d1e5SSatish Balay #undef __FUNC__ 1246b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ" 1247ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1248cee3aa6bSSatish Balay { 1249cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 12503a40ed3dSBarry Smith int ierr; 1251d64ed03dSBarry Smith 1252d64ed03dSBarry Smith PetscFunctionBegin; 1253a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 12543a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 12553a40ed3dSBarry Smith PetscFunctionReturn(0); 1256cee3aa6bSSatish Balay } 1257cee3aa6bSSatish Balay 12585615d1e5SSatish Balay #undef __FUNC__ 1259b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ" 1260ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1261cee3aa6bSSatish Balay { 1262cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1263cee3aa6bSSatish Balay int ierr; 1264d64ed03dSBarry Smith 1265d64ed03dSBarry Smith PetscFunctionBegin; 1266cee3aa6bSSatish Balay ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1267cee3aa6bSSatish Balay ierr = MatScale(aa,a->B);CHKERRQ(ierr); 12683a40ed3dSBarry Smith PetscFunctionReturn(0); 1269cee3aa6bSSatish Balay } 1270026e39d0SSatish Balay 12715615d1e5SSatish Balay #undef __FUNC__ 1272b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ" 1273ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 127457b952d6SSatish Balay { 127557b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1276d64ed03dSBarry Smith 1277d64ed03dSBarry Smith PetscFunctionBegin; 1278bd7f49f5SSatish Balay if (m) *m = mat->M; 1279bd7f49f5SSatish Balay if (n) *n = mat->N; 12803a40ed3dSBarry Smith PetscFunctionReturn(0); 128157b952d6SSatish Balay } 128257b952d6SSatish Balay 12835615d1e5SSatish Balay #undef __FUNC__ 1284b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ" 1285ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 128657b952d6SSatish Balay { 128757b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1288d64ed03dSBarry Smith 1289d64ed03dSBarry Smith PetscFunctionBegin; 1290f830108cSBarry Smith *m = mat->m; *n = mat->n; 12913a40ed3dSBarry Smith PetscFunctionReturn(0); 129257b952d6SSatish Balay } 129357b952d6SSatish Balay 12945615d1e5SSatish Balay #undef __FUNC__ 1295b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ" 1296ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 129757b952d6SSatish Balay { 129857b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1299d64ed03dSBarry Smith 1300d64ed03dSBarry Smith PetscFunctionBegin; 1301a21fb8cbSBarry Smith if (m) *m = mat->rstart*mat->bs; 1302a21fb8cbSBarry Smith if (n) *n = mat->rend*mat->bs; 13033a40ed3dSBarry Smith PetscFunctionReturn(0); 130457b952d6SSatish Balay } 130557b952d6SSatish Balay 13065615d1e5SSatish Balay #undef __FUNC__ 1307b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ" 1308acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1309acdf5bf4SSatish Balay { 1310acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1311acdf5bf4SSatish Balay Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1312acdf5bf4SSatish Balay int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1313d9d09a02SSatish Balay int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1314d9d09a02SSatish Balay int *cmap,*idx_p,cstart = mat->cstart; 1315acdf5bf4SSatish Balay 1316d64ed03dSBarry Smith PetscFunctionBegin; 1317a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1318acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1319acdf5bf4SSatish Balay 1320acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1321acdf5bf4SSatish Balay /* 1322acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1323acdf5bf4SSatish Balay */ 1324acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1325bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1326bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1327acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1328acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1329acdf5bf4SSatish Balay } 1330549d3d68SSatish Balay mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1331acdf5bf4SSatish Balay mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1332acdf5bf4SSatish Balay } 1333acdf5bf4SSatish Balay 1334a8c6a408SBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1335d9d09a02SSatish Balay lrow = row - brstart; 1336acdf5bf4SSatish Balay 1337acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1338acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1339acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1340f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1341f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1342acdf5bf4SSatish Balay nztot = nzA + nzB; 1343acdf5bf4SSatish Balay 1344acdf5bf4SSatish Balay cmap = mat->garray; 1345acdf5bf4SSatish Balay if (v || idx) { 1346acdf5bf4SSatish Balay if (nztot) { 1347acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1348acdf5bf4SSatish Balay int imark = -1; 1349acdf5bf4SSatish Balay if (v) { 1350acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1351acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1352d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1353acdf5bf4SSatish Balay else break; 1354acdf5bf4SSatish Balay } 1355acdf5bf4SSatish Balay imark = i; 1356acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1357acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1358acdf5bf4SSatish Balay } 1359acdf5bf4SSatish Balay if (idx) { 1360acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1361acdf5bf4SSatish Balay if (imark > -1) { 1362acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1363bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1364acdf5bf4SSatish Balay } 1365acdf5bf4SSatish Balay } else { 1366acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1367d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1368d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1369acdf5bf4SSatish Balay else break; 1370acdf5bf4SSatish Balay } 1371acdf5bf4SSatish Balay imark = i; 1372acdf5bf4SSatish Balay } 1373d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1374d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1375acdf5bf4SSatish Balay } 1376d64ed03dSBarry Smith } else { 1377d212a18eSSatish Balay if (idx) *idx = 0; 1378d212a18eSSatish Balay if (v) *v = 0; 1379d212a18eSSatish Balay } 1380acdf5bf4SSatish Balay } 1381acdf5bf4SSatish Balay *nz = nztot; 1382f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1383f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 13843a40ed3dSBarry Smith PetscFunctionReturn(0); 1385acdf5bf4SSatish Balay } 1386acdf5bf4SSatish Balay 13875615d1e5SSatish Balay #undef __FUNC__ 1388b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ" 1389acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1390acdf5bf4SSatish Balay { 1391acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1392d64ed03dSBarry Smith 1393d64ed03dSBarry Smith PetscFunctionBegin; 1394acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1395a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1396acdf5bf4SSatish Balay } 1397acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 13983a40ed3dSBarry Smith PetscFunctionReturn(0); 1399acdf5bf4SSatish Balay } 1400acdf5bf4SSatish Balay 14015615d1e5SSatish Balay #undef __FUNC__ 1402b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ" 1403ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 14045a838052SSatish Balay { 14055a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1406d64ed03dSBarry Smith 1407d64ed03dSBarry Smith PetscFunctionBegin; 14085a838052SSatish Balay *bs = baij->bs; 14093a40ed3dSBarry Smith PetscFunctionReturn(0); 14105a838052SSatish Balay } 14115a838052SSatish Balay 14125615d1e5SSatish Balay #undef __FUNC__ 1413b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ" 1414ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 141558667388SSatish Balay { 141658667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 141758667388SSatish Balay int ierr; 1418d64ed03dSBarry Smith 1419d64ed03dSBarry Smith PetscFunctionBegin; 142058667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 142158667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14223a40ed3dSBarry Smith PetscFunctionReturn(0); 142358667388SSatish Balay } 14240ac07820SSatish Balay 14255615d1e5SSatish Balay #undef __FUNC__ 1426b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ" 1427ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14280ac07820SSatish Balay { 14294e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14304e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 14317d57db60SLois Curfman McInnes int ierr; 1432329f5518SBarry Smith PetscReal isend[5],irecv[5]; 14330ac07820SSatish Balay 1434d64ed03dSBarry Smith PetscFunctionBegin; 14354e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 14364e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 14370e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1438de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 14394e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 14400e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1441de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 14420ac07820SSatish Balay if (flag == MAT_LOCAL) { 14434e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 14444e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 14454e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 14464e220ebcSLois Curfman McInnes info->memory = isend[3]; 14474e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 14480ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1449f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 14504e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14514e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14524e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14534e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14544e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 14550ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1456f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 14574e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14584e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14594e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14604e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14614e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1462d41123aaSBarry Smith } else { 1463d41123aaSBarry Smith SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 14640ac07820SSatish Balay } 14655a5d4f66SBarry Smith info->rows_global = (double)a->M; 14665a5d4f66SBarry Smith info->columns_global = (double)a->N; 14675a5d4f66SBarry Smith info->rows_local = (double)a->m; 14685a5d4f66SBarry Smith info->columns_local = (double)a->N; 14694e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 14704e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 14714e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 14723a40ed3dSBarry Smith PetscFunctionReturn(0); 14730ac07820SSatish Balay } 14740ac07820SSatish Balay 14755615d1e5SSatish Balay #undef __FUNC__ 1476b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ" 1477ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 147858667388SSatish Balay { 147958667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 148098305bb5SBarry Smith int ierr; 148158667388SSatish Balay 1482d64ed03dSBarry Smith PetscFunctionBegin; 148358667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 148458667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 14856da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1486c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 14874787f768SSatish Balay op == MAT_NEW_NONZERO_ALLOCATION_ERR || 14887c922b88SBarry Smith op == MAT_KEEP_ZEROED_ROWS || 14894787f768SSatish Balay op == MAT_NEW_NONZERO_LOCATION_ERR) { 149098305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 149198305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1492b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 14937c922b88SBarry Smith a->roworiented = PETSC_TRUE; 149498305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 149598305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1496b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 14976da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 149858667388SSatish Balay op == MAT_SYMMETRIC || 149958667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 1500b51ba29fSSatish Balay op == MAT_YES_NEW_DIAGONALS || 150198305bb5SBarry Smith op == MAT_USE_HASH_TABLE) { 150258667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 150398305bb5SBarry Smith } else if (op == MAT_COLUMN_ORIENTED) { 15047c922b88SBarry Smith a->roworiented = PETSC_FALSE; 150598305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 150698305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 15072b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 15087c922b88SBarry Smith a->donotstash = PETSC_TRUE; 1509d64ed03dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 1510d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1511133cdb44SSatish Balay } else if (op == MAT_USE_HASH_TABLE) { 15127c922b88SBarry Smith a->ht_flag = PETSC_TRUE; 1513d64ed03dSBarry Smith } else { 1514d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1515d64ed03dSBarry Smith } 15163a40ed3dSBarry Smith PetscFunctionReturn(0); 151758667388SSatish Balay } 151858667388SSatish Balay 15195615d1e5SSatish Balay #undef __FUNC__ 1520b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ(" 1521ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 15220ac07820SSatish Balay { 15230ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 15240ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15250ac07820SSatish Balay Mat B; 152640011551SBarry Smith int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 15270ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 15283eda8832SBarry Smith MatScalar *a; 15290ac07820SSatish Balay 1530d64ed03dSBarry Smith PetscFunctionBegin; 15317c922b88SBarry Smith if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 15327c922b88SBarry Smith ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 15330ac07820SSatish Balay 15340ac07820SSatish Balay /* copy over the A part */ 15350ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 15360ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 15370ac07820SSatish Balay rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 15380ac07820SSatish Balay 15390ac07820SSatish Balay for (i=0; i<mbs; i++) { 15400ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 15410ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 15420ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 15430ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 15440ac07820SSatish Balay for (k=0; k<bs; k++) { 1545*93fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 15460ac07820SSatish Balay col++; a += bs; 15470ac07820SSatish Balay } 15480ac07820SSatish Balay } 15490ac07820SSatish Balay } 15500ac07820SSatish Balay /* copy over the B part */ 15510ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 15520ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 15530ac07820SSatish Balay for (i=0; i<mbs; i++) { 15540ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 15550ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 15560ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 15570ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 15580ac07820SSatish Balay for (k=0; k<bs; k++) { 1559*93fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 15600ac07820SSatish Balay col++; a += bs; 15610ac07820SSatish Balay } 15620ac07820SSatish Balay } 15630ac07820SSatish Balay } 1564606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 15650ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15660ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15670ac07820SSatish Balay 15687c922b88SBarry Smith if (matout) { 15690ac07820SSatish Balay *matout = B; 15700ac07820SSatish Balay } else { 1571f830108cSBarry Smith PetscOps *Abops; 1572cc2dc46cSBarry Smith MatOps Aops; 1573f830108cSBarry Smith 15740ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 1575606d414cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 15760ac07820SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 15770ac07820SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1578aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 15790f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1580b1fc9764SSatish Balay #else 1581606d414cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1582b1fc9764SSatish Balay #endif 1583606d414cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1584606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1585606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1586606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 1587f830108cSBarry Smith 1588f830108cSBarry Smith /* 1589f830108cSBarry Smith This is horrible, horrible code. We need to keep the 1590f830108cSBarry Smith A pointers for the bops and ops but copy everything 1591f830108cSBarry Smith else from C. 1592f830108cSBarry Smith */ 1593f830108cSBarry Smith Abops = A->bops; 1594f830108cSBarry Smith Aops = A->ops; 1595549d3d68SSatish Balay ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1596f830108cSBarry Smith A->bops = Abops; 1597f830108cSBarry Smith A->ops = Aops; 1598f830108cSBarry Smith 15990ac07820SSatish Balay PetscHeaderDestroy(B); 16000ac07820SSatish Balay } 16013a40ed3dSBarry Smith PetscFunctionReturn(0); 16020ac07820SSatish Balay } 16030e95ebc0SSatish Balay 16045615d1e5SSatish Balay #undef __FUNC__ 1605b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ" 160636c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 16070e95ebc0SSatish Balay { 160836c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 160936c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 16100e95ebc0SSatish Balay int ierr,s1,s2,s3; 16110e95ebc0SSatish Balay 1612d64ed03dSBarry Smith PetscFunctionBegin; 161336c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 161436c4a09eSSatish Balay if (rr) { 161536c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 161636c4a09eSSatish Balay if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 161736c4a09eSSatish Balay /* Overlap communication with computation. */ 161836c4a09eSSatish Balay ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 161936c4a09eSSatish Balay } 16200e95ebc0SSatish Balay if (ll) { 16210e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 162236c4a09eSSatish Balay if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1623a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 16240e95ebc0SSatish Balay } 162536c4a09eSSatish Balay /* scale the diagonal block */ 162636c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 162736c4a09eSSatish Balay 162836c4a09eSSatish Balay if (rr) { 162936c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 163036c4a09eSSatish Balay ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1631a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 163236c4a09eSSatish Balay } 163336c4a09eSSatish Balay 16343a40ed3dSBarry Smith PetscFunctionReturn(0); 16350e95ebc0SSatish Balay } 16360e95ebc0SSatish Balay 16375615d1e5SSatish Balay #undef __FUNC__ 1638b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ" 1639ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 16400ac07820SSatish Balay { 16410ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 16420ac07820SSatish Balay int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1643a07cd24cSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 16440ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 16450ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1646a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 16470ac07820SSatish Balay MPI_Comm comm = A->comm; 16480ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 16490ac07820SSatish Balay MPI_Status recv_status,*send_status; 16500ac07820SSatish Balay IS istmp; 16510ac07820SSatish Balay 1652d64ed03dSBarry Smith PetscFunctionBegin; 16530ac07820SSatish Balay ierr = ISGetSize(is,&N);CHKERRQ(ierr); 16540ac07820SSatish Balay ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 16550ac07820SSatish Balay 16560ac07820SSatish Balay /* first count number of contributors to each processor */ 16570ac07820SSatish Balay nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1658549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1659549d3d68SSatish Balay procs = nprocs + size; 16600ac07820SSatish Balay owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 16610ac07820SSatish Balay for (i=0; i<N; i++) { 16620ac07820SSatish Balay idx = rows[i]; 16630ac07820SSatish Balay found = 0; 16640ac07820SSatish Balay for (j=0; j<size; j++) { 16650ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 16660ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 16670ac07820SSatish Balay } 16680ac07820SSatish Balay } 1669a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 16700ac07820SSatish Balay } 16710ac07820SSatish Balay nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 16720ac07820SSatish Balay 16730ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 16746831982aSBarry Smith work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 16756831982aSBarry Smith ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 16760ac07820SSatish Balay nmax = work[rank]; 16776831982aSBarry Smith nrecvs = work[size+rank]; 1678606d414cSSatish Balay ierr = PetscFree(work);CHKERRQ(ierr); 16790ac07820SSatish Balay 16800ac07820SSatish Balay /* post receives: */ 1681d64ed03dSBarry Smith rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1682d64ed03dSBarry Smith recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 16830ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 1684ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 16850ac07820SSatish Balay } 16860ac07820SSatish Balay 16870ac07820SSatish Balay /* do sends: 16880ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 16890ac07820SSatish Balay the ith processor 16900ac07820SSatish Balay */ 16910ac07820SSatish Balay svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1692ca161407SBarry Smith send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 16930ac07820SSatish Balay starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 16940ac07820SSatish Balay starts[0] = 0; 16950ac07820SSatish Balay for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 16960ac07820SSatish Balay for (i=0; i<N; i++) { 16970ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 16980ac07820SSatish Balay } 16996831982aSBarry Smith ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 17000ac07820SSatish Balay 17010ac07820SSatish Balay starts[0] = 0; 17020ac07820SSatish Balay for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 17030ac07820SSatish Balay count = 0; 17040ac07820SSatish Balay for (i=0; i<size; i++) { 17050ac07820SSatish Balay if (procs[i]) { 1706ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 17070ac07820SSatish Balay } 17080ac07820SSatish Balay } 1709606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 17100ac07820SSatish Balay 17110ac07820SSatish Balay base = owners[rank]*bs; 17120ac07820SSatish Balay 17130ac07820SSatish Balay /* wait on receives */ 17140ac07820SSatish Balay lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 17150ac07820SSatish Balay source = lens + nrecvs; 17160ac07820SSatish Balay count = nrecvs; slen = 0; 17170ac07820SSatish Balay while (count) { 1718ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17190ac07820SSatish Balay /* unpack receives into our local space */ 1720ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 17210ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17220ac07820SSatish Balay lens[imdex] = n; 17230ac07820SSatish Balay slen += n; 17240ac07820SSatish Balay count--; 17250ac07820SSatish Balay } 1726606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 17270ac07820SSatish Balay 17280ac07820SSatish Balay /* move the data into the send scatter */ 17290ac07820SSatish Balay lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 17300ac07820SSatish Balay count = 0; 17310ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 17320ac07820SSatish Balay values = rvalues + i*nmax; 17330ac07820SSatish Balay for (j=0; j<lens[i]; j++) { 17340ac07820SSatish Balay lrows[count++] = values[j] - base; 17350ac07820SSatish Balay } 17360ac07820SSatish Balay } 1737606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 1738606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1739606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 1740606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 17410ac07820SSatish Balay 17420ac07820SSatish Balay /* actually zap the local rows */ 1743029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 17440ac07820SSatish Balay PLogObjectParent(A,istmp); 1745a07cd24cSSatish Balay 174672dacd9aSBarry Smith /* 174772dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 174872dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 174972dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 175072dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 175172dacd9aSBarry Smith 175272dacd9aSBarry Smith Contributed by: Mathew Knepley 175372dacd9aSBarry Smith */ 17549c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 17553eda8832SBarry Smith ierr = MatZeroRows_SeqAIJ(l->B,istmp,0);CHKERRQ(ierr); 17569c957beeSSatish Balay if (diag && (l->A->M == l->A->N)) { 17573eda8832SBarry Smith ierr = MatZeroRows_SeqAIJ(l->A,istmp,diag);CHKERRQ(ierr); 17589c957beeSSatish Balay } else if (diag) { 17593eda8832SBarry Smith ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr); 1760fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1761fa46199cSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1762fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 17636525c446SSatish Balay } 1764a07cd24cSSatish Balay for (i = 0; i < slen; i++) { 1765a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 17663eda8832SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1767a07cd24cSSatish Balay } 1768a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1769a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17709c957beeSSatish Balay } else { 17713eda8832SBarry Smith ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr); 1772a07cd24cSSatish Balay } 17739c957beeSSatish Balay 17749c957beeSSatish Balay ierr = ISDestroy(istmp);CHKERRQ(ierr); 1775606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 1776a07cd24cSSatish Balay 17770ac07820SSatish Balay /* wait on sends */ 17780ac07820SSatish Balay if (nsends) { 1779d64ed03dSBarry Smith send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1780ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1781606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 17820ac07820SSatish Balay } 1783606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 1784606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 17850ac07820SSatish Balay 17863a40ed3dSBarry Smith PetscFunctionReturn(0); 17870ac07820SSatish Balay } 178872dacd9aSBarry Smith 17895615d1e5SSatish Balay #undef __FUNC__ 1790b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ" 1791ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1792ba4ca20aSSatish Balay { 1793ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 179425fdafccSSatish Balay MPI_Comm comm = A->comm; 1795133cdb44SSatish Balay static int called = 0; 17963a40ed3dSBarry Smith int ierr; 1797ba4ca20aSSatish Balay 1798d64ed03dSBarry Smith PetscFunctionBegin; 17993a40ed3dSBarry Smith if (!a->rank) { 18003a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 180125fdafccSSatish Balay } 180225fdafccSSatish Balay if (called) {PetscFunctionReturn(0);} else called = 1; 1803d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1804d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 18053a40ed3dSBarry Smith PetscFunctionReturn(0); 1806ba4ca20aSSatish Balay } 18070ac07820SSatish Balay 18085615d1e5SSatish Balay #undef __FUNC__ 1809b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ" 1810ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1811bb5a7306SBarry Smith { 1812bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1813bb5a7306SBarry Smith int ierr; 1814d64ed03dSBarry Smith 1815d64ed03dSBarry Smith PetscFunctionBegin; 1816bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 18173a40ed3dSBarry Smith PetscFunctionReturn(0); 1818bb5a7306SBarry Smith } 1819bb5a7306SBarry Smith 18202e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18210ac07820SSatish Balay 18227fc3c18eSBarry Smith #undef __FUNC__ 1823b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ" 18247fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 18257fc3c18eSBarry Smith { 18267fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 18277fc3c18eSBarry Smith Mat a,b,c,d; 18287fc3c18eSBarry Smith PetscTruth flg; 18297fc3c18eSBarry Smith int ierr; 18307fc3c18eSBarry Smith 18317fc3c18eSBarry Smith PetscFunctionBegin; 18327fc3c18eSBarry Smith if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 18337fc3c18eSBarry Smith a = matA->A; b = matA->B; 18347fc3c18eSBarry Smith c = matB->A; d = matB->B; 18357fc3c18eSBarry Smith 18367fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 18377fc3c18eSBarry Smith if (flg == PETSC_TRUE) { 18387fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18397fc3c18eSBarry Smith } 18407fc3c18eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 18417fc3c18eSBarry Smith PetscFunctionReturn(0); 18427fc3c18eSBarry Smith } 18437fc3c18eSBarry Smith 184479bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1845cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1846cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1847cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1848cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1849cc2dc46cSBarry Smith MatMult_MPIBAIJ, 1850cc2dc46cSBarry Smith MatMultAdd_MPIBAIJ, 18517c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 18527c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 1853cc2dc46cSBarry Smith 0, 1854cc2dc46cSBarry Smith 0, 1855cc2dc46cSBarry Smith 0, 1856cc2dc46cSBarry Smith 0, 1857cc2dc46cSBarry Smith 0, 1858cc2dc46cSBarry Smith 0, 1859cc2dc46cSBarry Smith 0, 1860cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 1861cc2dc46cSBarry Smith MatGetInfo_MPIBAIJ, 18627fc3c18eSBarry Smith MatEqual_MPIBAIJ, 1863cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1864cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1865cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 1866cc2dc46cSBarry Smith MatAssemblyBegin_MPIBAIJ, 1867cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1868cc2dc46cSBarry Smith 0, 1869cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1870cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 1871cc2dc46cSBarry Smith MatZeroRows_MPIBAIJ, 1872cc2dc46cSBarry Smith 0, 1873cc2dc46cSBarry Smith 0, 1874cc2dc46cSBarry Smith 0, 1875cc2dc46cSBarry Smith 0, 1876cc2dc46cSBarry Smith MatGetSize_MPIBAIJ, 1877cc2dc46cSBarry Smith MatGetLocalSize_MPIBAIJ, 1878cc2dc46cSBarry Smith MatGetOwnershipRange_MPIBAIJ, 1879cc2dc46cSBarry Smith 0, 1880cc2dc46cSBarry Smith 0, 1881cc2dc46cSBarry Smith 0, 1882cc2dc46cSBarry Smith 0, 18832e8a6d31SBarry Smith MatDuplicate_MPIBAIJ, 1884cc2dc46cSBarry Smith 0, 1885cc2dc46cSBarry Smith 0, 1886cc2dc46cSBarry Smith 0, 1887cc2dc46cSBarry Smith 0, 1888cc2dc46cSBarry Smith 0, 1889cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1890cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1891cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1892cc2dc46cSBarry Smith 0, 1893cc2dc46cSBarry Smith MatPrintHelp_MPIBAIJ, 1894cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1895cc2dc46cSBarry Smith 0, 1896cc2dc46cSBarry Smith 0, 1897cc2dc46cSBarry Smith 0, 1898cc2dc46cSBarry Smith MatGetBlockSize_MPIBAIJ, 1899cc2dc46cSBarry Smith 0, 1900cc2dc46cSBarry Smith 0, 1901cc2dc46cSBarry Smith 0, 1902cc2dc46cSBarry Smith 0, 1903cc2dc46cSBarry Smith 0, 1904cc2dc46cSBarry Smith 0, 1905cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 1906cc2dc46cSBarry Smith 0, 1907cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 1908cc2dc46cSBarry Smith 0, 1909cc2dc46cSBarry Smith 0, 1910cc2dc46cSBarry Smith 0, 1911cc2dc46cSBarry Smith MatGetMaps_Petsc}; 191279bdfe76SSatish Balay 19135ef9f2a5SBarry Smith 1914e18c124aSSatish Balay EXTERN_C_BEGIN 19155ef9f2a5SBarry Smith #undef __FUNC__ 1916b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ" 19175ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 19185ef9f2a5SBarry Smith { 19195ef9f2a5SBarry Smith PetscFunctionBegin; 19205ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 19215ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 19225ef9f2a5SBarry Smith PetscFunctionReturn(0); 19235ef9f2a5SBarry Smith } 1924e18c124aSSatish Balay EXTERN_C_END 192579bdfe76SSatish Balay 19265615d1e5SSatish Balay #undef __FUNC__ 1927b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ" 192879bdfe76SSatish Balay /*@C 192979bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 193079bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 193179bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 193279bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 193379bdfe76SSatish Balay performance can be increased by more than a factor of 50. 193479bdfe76SSatish Balay 1935db81eaa0SLois Curfman McInnes Collective on MPI_Comm 1936db81eaa0SLois Curfman McInnes 193779bdfe76SSatish Balay Input Parameters: 1938db81eaa0SLois Curfman McInnes + comm - MPI communicator 193979bdfe76SSatish Balay . bs - size of blockk 194079bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 194192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 194292e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 194392e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 194492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 194592e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 1946be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1947be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 194879bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 194979bdfe76SSatish Balay submatrix (same for all local rows) 19507fc3c18eSBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 195192e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 1952db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 195392e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 195479bdfe76SSatish Balay submatrix (same for all local rows). 19557fc3c18eSBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 195692e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 195792e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 195879bdfe76SSatish Balay 195979bdfe76SSatish Balay Output Parameter: 196079bdfe76SSatish Balay . A - the matrix 196179bdfe76SSatish Balay 1962db81eaa0SLois Curfman McInnes Options Database Keys: 1963db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 1964db81eaa0SLois Curfman McInnes block calculations (much slower) 1965db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 1966494eafd4SBarry Smith . -mat_mpi - use the parallel matrix data structures even on one processor 1967494eafd4SBarry Smith (defaults to using SeqBAIJ format on one processor) 19683ffaccefSLois Curfman McInnes 1969b259b22eSLois Curfman McInnes Notes: 197079bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 197179bdfe76SSatish Balay (possibly both). 197279bdfe76SSatish Balay 1973be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1974be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 1975be79a94dSBarry Smith 197679bdfe76SSatish Balay Storage Information: 197779bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 197879bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 197979bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 198079bdfe76SSatish Balay local matrix (a rectangular submatrix). 198179bdfe76SSatish Balay 198279bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 198379bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 198479bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 198579bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 198679bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 198779bdfe76SSatish Balay 198879bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 198979bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 199079bdfe76SSatish Balay 1991db81eaa0SLois Curfman McInnes .vb 1992db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 1993db81eaa0SLois Curfman McInnes ------------------- 1994db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 1995db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 1996db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 1997db81eaa0SLois Curfman McInnes ------------------- 1998db81eaa0SLois Curfman McInnes .ve 199979bdfe76SSatish Balay 200079bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 200179bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 200279bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 200357b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 200479bdfe76SSatish Balay 2005d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2006d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 200779bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 200892e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 200992e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 20106da5968aSLois Curfman McInnes matrices. 201179bdfe76SSatish Balay 2012027ccd11SLois Curfman McInnes Level: intermediate 2013027ccd11SLois Curfman McInnes 201492e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 201579bdfe76SSatish Balay 20163eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 201779bdfe76SSatish Balay @*/ 2018329f5518SBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 201979bdfe76SSatish Balay { 202079bdfe76SSatish Balay Mat B; 202179bdfe76SSatish Balay Mat_MPIBAIJ *b; 2022f1af5d2fSBarry Smith int ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 2023f1af5d2fSBarry Smith PetscTruth flag1,flag2,flg; 202479bdfe76SSatish Balay 2025d64ed03dSBarry Smith PetscFunctionBegin; 2026962fb4a0SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2027962fb4a0SBarry Smith 2028a8c6a408SBarry Smith if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 202936c4a09eSSatish Balay if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 203036c4a09eSSatish Balay if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 20314fdb0a08SBarry Smith if (d_nnz) { 203236c4a09eSSatish Balay for (i=0; i<m/bs; i++) { 20334fdb0a08SBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 20344fdb0a08SBarry Smith } 20354fdb0a08SBarry Smith } 20364fdb0a08SBarry Smith if (o_nnz) { 203736c4a09eSSatish Balay for (i=0; i<m/bs; i++) { 20384fdb0a08SBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 20394fdb0a08SBarry Smith } 20404fdb0a08SBarry Smith } 20413914022bSBarry Smith 2042d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2043494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2044494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2045494eafd4SBarry Smith if (!flag1 && !flag2 && size == 1) { 20463914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 20473914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 20483914022bSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 20493a40ed3dSBarry Smith PetscFunctionReturn(0); 20503914022bSBarry Smith } 20513914022bSBarry Smith 205279bdfe76SSatish Balay *A = 0; 20533f1db9ecSBarry Smith PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 205479bdfe76SSatish Balay PLogObjectCreate(B); 205579bdfe76SSatish Balay B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2056549d3d68SSatish Balay ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2057549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 20584c50302cSBarry Smith 2059e1311b90SBarry Smith B->ops->destroy = MatDestroy_MPIBAIJ; 2060e1311b90SBarry Smith B->ops->view = MatView_MPIBAIJ; 206190f02eecSBarry Smith B->mapping = 0; 206279bdfe76SSatish Balay B->factor = 0; 206379bdfe76SSatish Balay B->assembled = PETSC_FALSE; 206479bdfe76SSatish Balay 2065e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 2066d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2067d132466eSBarry Smith ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 206879bdfe76SSatish Balay 20697c922b88SBarry Smith if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2070a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2071d64ed03dSBarry Smith } 2072a8c6a408SBarry Smith if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2073a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2074a8c6a408SBarry Smith } 2075a8c6a408SBarry Smith if (N == PETSC_DECIDE && n == PETSC_DECIDE) { 2076a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2077a8c6a408SBarry Smith } 2078cee3aa6bSSatish Balay if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2079cee3aa6bSSatish Balay if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 208079bdfe76SSatish Balay 208179bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 208279bdfe76SSatish Balay work[0] = m; work[1] = n; 208379bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 2084ca161407SBarry Smith ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 208579bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 208679bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 208779bdfe76SSatish Balay } 208879bdfe76SSatish Balay if (m == PETSC_DECIDE) { 208979bdfe76SSatish Balay Mbs = M/bs; 2090a8c6a408SBarry Smith if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 209179bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 209279bdfe76SSatish Balay m = mbs*bs; 209379bdfe76SSatish Balay } 209479bdfe76SSatish Balay if (n == PETSC_DECIDE) { 209579bdfe76SSatish Balay Nbs = N/bs; 2096a8c6a408SBarry Smith if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 209779bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 209879bdfe76SSatish Balay n = nbs*bs; 209979bdfe76SSatish Balay } 2100a8c6a408SBarry Smith if (mbs*bs != m || nbs*bs != n) { 2101a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2102a8c6a408SBarry Smith } 210379bdfe76SSatish Balay 210479bdfe76SSatish Balay b->m = m; B->m = m; 210579bdfe76SSatish Balay b->n = n; B->n = n; 210679bdfe76SSatish Balay b->N = N; B->N = N; 210779bdfe76SSatish Balay b->M = M; B->M = M; 210879bdfe76SSatish Balay b->bs = bs; 210979bdfe76SSatish Balay b->bs2 = bs*bs; 211079bdfe76SSatish Balay b->mbs = mbs; 211179bdfe76SSatish Balay b->nbs = nbs; 211279bdfe76SSatish Balay b->Mbs = Mbs; 211379bdfe76SSatish Balay b->Nbs = Nbs; 211479bdfe76SSatish Balay 2115c7fcc2eaSBarry Smith /* the information in the maps duplicates the information computed below, eventually 2116c7fcc2eaSBarry Smith we should remove the duplicate information that is not contained in the maps */ 2117488ecbafSBarry Smith ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2118488ecbafSBarry Smith ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2119c7fcc2eaSBarry Smith 212079bdfe76SSatish Balay /* build local table of row and column ownerships */ 2121ff2fd236SBarry Smith b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2122ff2fd236SBarry Smith PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 21230ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 2124ff2fd236SBarry Smith b->rowners_bs = b->cowners + b->size + 2; 2125ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 212679bdfe76SSatish Balay b->rowners[0] = 0; 212779bdfe76SSatish Balay for (i=2; i<=b->size; i++) { 212879bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 212979bdfe76SSatish Balay } 2130ff2fd236SBarry Smith for (i=0; i<=b->size; i++) { 2131ff2fd236SBarry Smith b->rowners_bs[i] = b->rowners[i]*bs; 2132ff2fd236SBarry Smith } 213379bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 213479bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 21354fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 21364fa0d573SSatish Balay b->rend_bs = b->rend * bs; 21374fa0d573SSatish Balay 2138ca161407SBarry Smith ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 213979bdfe76SSatish Balay b->cowners[0] = 0; 214079bdfe76SSatish Balay for (i=2; i<=b->size; i++) { 214179bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 214279bdfe76SSatish Balay } 214379bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 214479bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 21454fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 21464fa0d573SSatish Balay b->cend_bs = b->cend * bs; 214779bdfe76SSatish Balay 2148a07cd24cSSatish Balay 214979bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 2150029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 215179bdfe76SSatish Balay PLogObjectParent(B,b->A); 215279bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 2153029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 215479bdfe76SSatish Balay PLogObjectParent(B,b->B); 215579bdfe76SSatish Balay 215679bdfe76SSatish Balay /* build cache for off array entries formed */ 21578798bf22SSatish Balay ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 21588798bf22SSatish Balay ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 21597c922b88SBarry Smith b->donotstash = PETSC_FALSE; 21607c922b88SBarry Smith b->colmap = PETSC_NULL; 21617c922b88SBarry Smith b->garray = PETSC_NULL; 21627c922b88SBarry Smith b->roworiented = PETSC_TRUE; 216379bdfe76SSatish Balay 216430793edcSSatish Balay /* stuff used in block assembly */ 216530793edcSSatish Balay b->barray = 0; 216630793edcSSatish Balay 216779bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 216879bdfe76SSatish Balay b->lvec = 0; 216979bdfe76SSatish Balay b->Mvctx = 0; 217079bdfe76SSatish Balay 217179bdfe76SSatish Balay /* stuff for MatGetRow() */ 217279bdfe76SSatish Balay b->rowindices = 0; 217379bdfe76SSatish Balay b->rowvalues = 0; 217479bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 217579bdfe76SSatish Balay 2176a07cd24cSSatish Balay /* hash table stuff */ 2177a07cd24cSSatish Balay b->ht = 0; 2178187ce0cbSSatish Balay b->hd = 0; 21790bdbc534SSatish Balay b->ht_size = 0; 21807c922b88SBarry Smith b->ht_flag = PETSC_FALSE; 218125fdafccSSatish Balay b->ht_fact = 0; 2182187ce0cbSSatish Balay b->ht_total_ct = 0; 2183187ce0cbSSatish Balay b->ht_insert_ct = 0; 2184a07cd24cSSatish Balay 218579bdfe76SSatish Balay *A = B; 2186133cdb44SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2187133cdb44SSatish Balay if (flg) { 2188133cdb44SSatish Balay double fact = 1.39; 2189133cdb44SSatish Balay ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2190f1af5d2fSBarry Smith ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2191133cdb44SSatish Balay if (fact <= 1.0) fact = 1.39; 2192133cdb44SSatish Balay ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2193133cdb44SSatish Balay PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2194133cdb44SSatish Balay } 2195f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 21967fc3c18eSBarry Smith "MatStoreValues_MPIBAIJ", 21977fc3c18eSBarry Smith (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2198f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 21997fc3c18eSBarry Smith "MatRetrieveValues_MPIBAIJ", 22007fc3c18eSBarry Smith (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2201f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 22025ef9f2a5SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 22035ef9f2a5SBarry Smith (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 22043a40ed3dSBarry Smith PetscFunctionReturn(0); 220579bdfe76SSatish Balay } 2206026e39d0SSatish Balay 22075615d1e5SSatish Balay #undef __FUNC__ 2208b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ" 22092e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 22100ac07820SSatish Balay { 22110ac07820SSatish Balay Mat mat; 22120ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2213f1af5d2fSBarry Smith int ierr,len=0; 2214f1af5d2fSBarry Smith PetscTruth flg; 22150ac07820SSatish Balay 2216d64ed03dSBarry Smith PetscFunctionBegin; 22170ac07820SSatish Balay *newmat = 0; 22183f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 22190ac07820SSatish Balay PLogObjectCreate(mat); 22200ac07820SSatish Balay mat->data = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2221549d3d68SSatish Balay ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2222e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIBAIJ; 2223e1311b90SBarry Smith mat->ops->view = MatView_MPIBAIJ; 22240ac07820SSatish Balay mat->factor = matin->factor; 22250ac07820SSatish Balay mat->assembled = PETSC_TRUE; 2226aef5e8e0SSatish Balay mat->insertmode = NOT_SET_VALUES; 22270ac07820SSatish Balay 22280ac07820SSatish Balay a->m = mat->m = oldmat->m; 22290ac07820SSatish Balay a->n = mat->n = oldmat->n; 22300ac07820SSatish Balay a->M = mat->M = oldmat->M; 22310ac07820SSatish Balay a->N = mat->N = oldmat->N; 22320ac07820SSatish Balay 22330ac07820SSatish Balay a->bs = oldmat->bs; 22340ac07820SSatish Balay a->bs2 = oldmat->bs2; 22350ac07820SSatish Balay a->mbs = oldmat->mbs; 22360ac07820SSatish Balay a->nbs = oldmat->nbs; 22370ac07820SSatish Balay a->Mbs = oldmat->Mbs; 22380ac07820SSatish Balay a->Nbs = oldmat->Nbs; 22390ac07820SSatish Balay 22400ac07820SSatish Balay a->rstart = oldmat->rstart; 22410ac07820SSatish Balay a->rend = oldmat->rend; 22420ac07820SSatish Balay a->cstart = oldmat->cstart; 22430ac07820SSatish Balay a->cend = oldmat->cend; 22440ac07820SSatish Balay a->size = oldmat->size; 22450ac07820SSatish Balay a->rank = oldmat->rank; 2246aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2247aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2248aef5e8e0SSatish Balay a->rowindices = 0; 22490ac07820SSatish Balay a->rowvalues = 0; 22500ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 225130793edcSSatish Balay a->barray = 0; 22523123a43fSSatish Balay a->rstart_bs = oldmat->rstart_bs; 22533123a43fSSatish Balay a->rend_bs = oldmat->rend_bs; 22543123a43fSSatish Balay a->cstart_bs = oldmat->cstart_bs; 22553123a43fSSatish Balay a->cend_bs = oldmat->cend_bs; 22560ac07820SSatish Balay 2257133cdb44SSatish Balay /* hash table stuff */ 2258133cdb44SSatish Balay a->ht = 0; 2259133cdb44SSatish Balay a->hd = 0; 2260133cdb44SSatish Balay a->ht_size = 0; 2261133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 226225fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2263133cdb44SSatish Balay a->ht_total_ct = 0; 2264133cdb44SSatish Balay a->ht_insert_ct = 0; 2265133cdb44SSatish Balay 2266133cdb44SSatish Balay 2267ff2fd236SBarry Smith a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2268ff2fd236SBarry Smith PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 22690ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 2270ff2fd236SBarry Smith a->rowners_bs = a->cowners + a->size + 2; 2271549d3d68SSatish Balay ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 22728798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 22738798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 22740ac07820SSatish Balay if (oldmat->colmap) { 2275aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 22760f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 227748e59246SSatish Balay #else 22780ac07820SSatish Balay a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 22790ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2280549d3d68SSatish Balay ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 228148e59246SSatish Balay #endif 22820ac07820SSatish Balay } else a->colmap = 0; 22830ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 22840ac07820SSatish Balay a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 22850ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 2286549d3d68SSatish Balay ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 22870ac07820SSatish Balay } else a->garray = 0; 22880ac07820SSatish Balay 22890ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 22900ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 22910ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2292e18c124aSSatish Balay 22930ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 22942e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 22950ac07820SSatish Balay PLogObjectParent(mat,a->A); 22962e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 22970ac07820SSatish Balay PLogObjectParent(mat,a->B); 22980ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2299e18c124aSSatish Balay ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 23000ac07820SSatish Balay if (flg) { 23010ac07820SSatish Balay ierr = MatPrintHelp(mat);CHKERRQ(ierr); 23020ac07820SSatish Balay } 23030ac07820SSatish Balay *newmat = mat; 23043a40ed3dSBarry Smith PetscFunctionReturn(0); 23050ac07820SSatish Balay } 230657b952d6SSatish Balay 230757b952d6SSatish Balay #include "sys.h" 230857b952d6SSatish Balay 23095615d1e5SSatish Balay #undef __FUNC__ 2310b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ" 231157b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 231257b952d6SSatish Balay { 231357b952d6SSatish Balay Mat A; 231457b952d6SSatish Balay int i,nz,ierr,j,rstart,rend,fd; 231557b952d6SSatish Balay Scalar *vals,*buf; 231657b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 231757b952d6SSatish Balay MPI_Status status; 2318cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 231957b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2320f1af5d2fSBarry Smith int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 232157b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 232257b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 232357b952d6SSatish Balay 2324d64ed03dSBarry Smith PetscFunctionBegin; 2325f1af5d2fSBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 232657b952d6SSatish Balay 2327d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2328d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 232957b952d6SSatish Balay if (!rank) { 233057b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2331e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2332a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2333d64ed03dSBarry Smith if (header[3] < 0) { 2334a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2335d64ed03dSBarry Smith } 23366c5fab8fSBarry Smith } 2337d64ed03dSBarry Smith 2338ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 233957b952d6SSatish Balay M = header[1]; N = header[2]; 234057b952d6SSatish Balay 2341a8c6a408SBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 234257b952d6SSatish Balay 234357b952d6SSatish Balay /* 234457b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 234557b952d6SSatish Balay divisible by the blocksize 234657b952d6SSatish Balay */ 234757b952d6SSatish Balay Mbs = M/bs; 234857b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 234957b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 235057b952d6SSatish Balay else Mbs++; 235157b952d6SSatish Balay if (extra_rows &&!rank) { 2352b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 235357b952d6SSatish Balay } 2354537820f0SBarry Smith 235557b952d6SSatish Balay /* determine ownership of all rows */ 235657b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 235757b952d6SSatish Balay m = mbs * bs; 2358cee3aa6bSSatish Balay rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2359cee3aa6bSSatish Balay browners = rowners + size + 1; 2360ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 236157b952d6SSatish Balay rowners[0] = 0; 2362cee3aa6bSSatish Balay for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2363cee3aa6bSSatish Balay for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 236457b952d6SSatish Balay rstart = rowners[rank]; 236557b952d6SSatish Balay rend = rowners[rank+1]; 236657b952d6SSatish Balay 236757b952d6SSatish Balay /* distribute row lengths to all processors */ 236857b952d6SSatish Balay locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 236957b952d6SSatish Balay if (!rank) { 237057b952d6SSatish Balay rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2371e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 237257b952d6SSatish Balay for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 237357b952d6SSatish Balay sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2374cee3aa6bSSatish Balay for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2375ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2376606d414cSSatish Balay ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2377d64ed03dSBarry Smith } else { 2378ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 237957b952d6SSatish Balay } 238057b952d6SSatish Balay 238157b952d6SSatish Balay if (!rank) { 238257b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 238357b952d6SSatish Balay procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2384549d3d68SSatish Balay ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 238557b952d6SSatish Balay for (i=0; i<size; i++) { 238657b952d6SSatish Balay for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 238757b952d6SSatish Balay procsnz[i] += rowlengths[j]; 238857b952d6SSatish Balay } 238957b952d6SSatish Balay } 2390606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 239157b952d6SSatish Balay 239257b952d6SSatish Balay /* determine max buffer needed and allocate it */ 239357b952d6SSatish Balay maxnz = 0; 239457b952d6SSatish Balay for (i=0; i<size; i++) { 239557b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 239657b952d6SSatish Balay } 239757b952d6SSatish Balay cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 239857b952d6SSatish Balay 239957b952d6SSatish Balay /* read in my part of the matrix column indices */ 240057b952d6SSatish Balay nz = procsnz[0]; 240157b952d6SSatish Balay ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 240257b952d6SSatish Balay mycols = ibuf; 2403cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2404e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2405cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2406cee3aa6bSSatish Balay 240757b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 240857b952d6SSatish Balay for (i=1; i<size-1; i++) { 240957b952d6SSatish Balay nz = procsnz[i]; 2410e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2411ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 241257b952d6SSatish Balay } 241357b952d6SSatish Balay /* read in the stuff for the last proc */ 241457b952d6SSatish Balay if (size != 1) { 241557b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2416e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 241757b952d6SSatish Balay for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2418ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 241957b952d6SSatish Balay } 2420606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 2421d64ed03dSBarry Smith } else { 242257b952d6SSatish Balay /* determine buffer space needed for message */ 242357b952d6SSatish Balay nz = 0; 242457b952d6SSatish Balay for (i=0; i<m; i++) { 242557b952d6SSatish Balay nz += locrowlens[i]; 242657b952d6SSatish Balay } 242757b952d6SSatish Balay ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 242857b952d6SSatish Balay mycols = ibuf; 242957b952d6SSatish Balay /* receive message of column indices*/ 2430ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2431ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2432a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 243357b952d6SSatish Balay } 243457b952d6SSatish Balay 243557b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2436cee3aa6bSSatish Balay dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2437cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 243857b952d6SSatish Balay mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2439549d3d68SSatish Balay ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 244057b952d6SSatish Balay masked1 = mask + Mbs; 244157b952d6SSatish Balay masked2 = masked1 + Mbs; 244257b952d6SSatish Balay rowcount = 0; nzcount = 0; 244357b952d6SSatish Balay for (i=0; i<mbs; i++) { 244457b952d6SSatish Balay dcount = 0; 244557b952d6SSatish Balay odcount = 0; 244657b952d6SSatish Balay for (j=0; j<bs; j++) { 244757b952d6SSatish Balay kmax = locrowlens[rowcount]; 244857b952d6SSatish Balay for (k=0; k<kmax; k++) { 244957b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 245057b952d6SSatish Balay if (!mask[tmp]) { 245157b952d6SSatish Balay mask[tmp] = 1; 245257b952d6SSatish Balay if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 245357b952d6SSatish Balay else masked1[dcount++] = tmp; 245457b952d6SSatish Balay } 245557b952d6SSatish Balay } 245657b952d6SSatish Balay rowcount++; 245757b952d6SSatish Balay } 2458cee3aa6bSSatish Balay 245957b952d6SSatish Balay dlens[i] = dcount; 246057b952d6SSatish Balay odlens[i] = odcount; 2461cee3aa6bSSatish Balay 246257b952d6SSatish Balay /* zero out the mask elements we set */ 246357b952d6SSatish Balay for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 246457b952d6SSatish Balay for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 246557b952d6SSatish Balay } 2466cee3aa6bSSatish Balay 246757b952d6SSatish Balay /* create our matrix */ 2468549d3d68SSatish Balay ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 246957b952d6SSatish Balay A = *newmat; 24706d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 247157b952d6SSatish Balay 247257b952d6SSatish Balay if (!rank) { 247357b952d6SSatish Balay buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 247457b952d6SSatish Balay /* read in my part of the matrix numerical values */ 247557b952d6SSatish Balay nz = procsnz[0]; 247657b952d6SSatish Balay vals = buf; 2477cee3aa6bSSatish Balay mycols = ibuf; 2478cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2479e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2480cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2481537820f0SBarry Smith 248257b952d6SSatish Balay /* insert into matrix */ 248357b952d6SSatish Balay jj = rstart*bs; 248457b952d6SSatish Balay for (i=0; i<m; i++) { 24853eda8832SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 248657b952d6SSatish Balay mycols += locrowlens[i]; 248757b952d6SSatish Balay vals += locrowlens[i]; 248857b952d6SSatish Balay jj++; 248957b952d6SSatish Balay } 249057b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 249157b952d6SSatish Balay for (i=1; i<size-1; i++) { 249257b952d6SSatish Balay nz = procsnz[i]; 249357b952d6SSatish Balay vals = buf; 2494e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2495ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 249657b952d6SSatish Balay } 249757b952d6SSatish Balay /* the last proc */ 249857b952d6SSatish Balay if (size != 1){ 249957b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2500cee3aa6bSSatish Balay vals = buf; 2501e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 250257b952d6SSatish Balay for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2503ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 250457b952d6SSatish Balay } 2505606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 2506d64ed03dSBarry Smith } else { 250757b952d6SSatish Balay /* receive numeric values */ 250857b952d6SSatish Balay buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 250957b952d6SSatish Balay 251057b952d6SSatish Balay /* receive message of values*/ 251157b952d6SSatish Balay vals = buf; 2512cee3aa6bSSatish Balay mycols = ibuf; 2513ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2514ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2515a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 251657b952d6SSatish Balay 251757b952d6SSatish Balay /* insert into matrix */ 251857b952d6SSatish Balay jj = rstart*bs; 2519cee3aa6bSSatish Balay for (i=0; i<m; i++) { 25203eda8832SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 252157b952d6SSatish Balay mycols += locrowlens[i]; 252257b952d6SSatish Balay vals += locrowlens[i]; 252357b952d6SSatish Balay jj++; 252457b952d6SSatish Balay } 252557b952d6SSatish Balay } 2526606d414cSSatish Balay ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2527606d414cSSatish Balay ierr = PetscFree(buf);CHKERRQ(ierr); 2528606d414cSSatish Balay ierr = PetscFree(ibuf);CHKERRQ(ierr); 2529606d414cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 2530606d414cSSatish Balay ierr = PetscFree(dlens);CHKERRQ(ierr); 2531606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 25326d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 25336d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 25343a40ed3dSBarry Smith PetscFunctionReturn(0); 253557b952d6SSatish Balay } 253657b952d6SSatish Balay 2537133cdb44SSatish Balay #undef __FUNC__ 2538b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor" 2539133cdb44SSatish Balay /*@ 2540133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2541133cdb44SSatish Balay 2542133cdb44SSatish Balay Input Parameters: 2543133cdb44SSatish Balay . mat - the matrix 2544133cdb44SSatish Balay . fact - factor 2545133cdb44SSatish Balay 2546fee21e36SBarry Smith Collective on Mat 2547fee21e36SBarry Smith 25488c890885SBarry Smith Level: advanced 25498c890885SBarry Smith 2550133cdb44SSatish Balay Notes: 2551133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2552133cdb44SSatish Balay 2553133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2554133cdb44SSatish Balay 2555133cdb44SSatish Balay .seealso: MatSetOption() 2556133cdb44SSatish Balay @*/ 2557329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2558133cdb44SSatish Balay { 255925fdafccSSatish Balay Mat_MPIBAIJ *baij; 2560133cdb44SSatish Balay 2561133cdb44SSatish Balay PetscFunctionBegin; 2562133cdb44SSatish Balay PetscValidHeaderSpecific(mat,MAT_COOKIE); 256325fdafccSSatish Balay if (mat->type != MATMPIBAIJ) { 2564133cdb44SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2565133cdb44SSatish Balay } 2566133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 2567133cdb44SSatish Balay baij->ht_fact = fact; 2568133cdb44SSatish Balay PetscFunctionReturn(0); 2569133cdb44SSatish Balay } 2570