xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision aa95bbe85be7e7130f9d5a4d5756fc8660e8a0c6)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
279bdfe76SSatish Balay 
3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
479bdfe76SSatish Balay 
5dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6dfbe8321SBarry Smith EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7b24ad042SBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8b24ad042SBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9b24ad042SBarry Smith EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12b24ad042SBarry Smith EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13b24ad042SBarry Smith EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14f4df32b1SMatthew Knepley EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
1593fea6afSBarry Smith 
1693fea6afSBarry Smith /*  UGLY, ugly, ugly
1787828ca2SBarry Smith    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
1893fea6afSBarry Smith    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
1993fea6afSBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
2093fea6afSBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
2193fea6afSBarry Smith    into the single precision data structures.
2293fea6afSBarry Smith */
2393fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
24b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
2993fea6afSBarry Smith #else
306fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
3193fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
3293fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
336fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
346fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
3593fea6afSBarry Smith #endif
363b2fbd54SBarry Smith 
374a2ae208SSatish Balay #undef __FUNCT__
38985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
39985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
407843d17aSBarry Smith {
417843d17aSBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42dfbe8321SBarry Smith   PetscErrorCode ierr;
43985db425SBarry Smith   PetscInt       i,*idxb = 0;
4487828ca2SBarry Smith   PetscScalar    *va,*vb;
457843d17aSBarry Smith   Vec            vtmp;
467843d17aSBarry Smith 
477843d17aSBarry Smith   PetscFunctionBegin;
487843d17aSBarry Smith 
49985db425SBarry Smith   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
501ebc52fbSHong Zhang   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51985db425SBarry Smith   if (idx) {
52ff6e566aSBarry Smith     for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;}
53985db425SBarry Smith   }
547843d17aSBarry Smith 
55899cda47SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
56985db425SBarry Smith   if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
57985db425SBarry Smith   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
581ebc52fbSHong Zhang   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
597843d17aSBarry Smith 
60899cda47SBarry Smith   for (i=0; i<A->rmap.n; i++){
61985db425SBarry Smith     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);}
627843d17aSBarry Smith   }
637843d17aSBarry Smith 
641ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
651ebc52fbSHong Zhang   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
66985db425SBarry Smith   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
67ac355199SBarry Smith   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
687843d17aSBarry Smith   PetscFunctionReturn(0);
697843d17aSBarry Smith }
707843d17aSBarry Smith 
717fc3c18eSBarry Smith EXTERN_C_BEGIN
724a2ae208SSatish Balay #undef __FUNCT__
734a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ"
74be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
757fc3c18eSBarry Smith {
767fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
77dfbe8321SBarry Smith   PetscErrorCode ierr;
787fc3c18eSBarry Smith 
797fc3c18eSBarry Smith   PetscFunctionBegin;
807fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
817fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
827fc3c18eSBarry Smith   PetscFunctionReturn(0);
837fc3c18eSBarry Smith }
847fc3c18eSBarry Smith EXTERN_C_END
857fc3c18eSBarry Smith 
867fc3c18eSBarry Smith EXTERN_C_BEGIN
874a2ae208SSatish Balay #undef __FUNCT__
884a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
89be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
907fc3c18eSBarry Smith {
917fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
92dfbe8321SBarry Smith   PetscErrorCode ierr;
937fc3c18eSBarry Smith 
947fc3c18eSBarry Smith   PetscFunctionBegin;
957fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
967fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
977fc3c18eSBarry Smith   PetscFunctionReturn(0);
987fc3c18eSBarry Smith }
997fc3c18eSBarry Smith EXTERN_C_END
1007fc3c18eSBarry Smith 
101537820f0SBarry Smith /*
102537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
10357b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
10457b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
10557b952d6SSatish Balay    length of colmap equals the global matrix length.
10657b952d6SSatish Balay */
1074a2ae208SSatish Balay #undef __FUNCT__
1084a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
109dfbe8321SBarry Smith PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
11057b952d6SSatish Balay {
11157b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
11257b952d6SSatish Balay   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
1136849ba73SBarry Smith   PetscErrorCode ierr;
114899cda47SBarry Smith   PetscInt       nbs = B->nbs,i,bs=mat->rmap.bs;
11557b952d6SSatish Balay 
116d64ed03dSBarry Smith   PetscFunctionBegin;
117aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
118f14a1c24SBarry Smith   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
11948e59246SSatish Balay   for (i=0; i<nbs; i++){
1200f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
12148e59246SSatish Balay   }
12248e59246SSatish Balay #else
123b24ad042SBarry Smith   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
12452e6d16bSBarry Smith   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
125b24ad042SBarry Smith   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
126928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
12748e59246SSatish Balay #endif
1283a40ed3dSBarry Smith   PetscFunctionReturn(0);
12957b952d6SSatish Balay }
13057b952d6SSatish Balay 
13180c1aa95SSatish Balay #define CHUNKSIZE  10
13280c1aa95SSatish Balay 
133f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
13480c1aa95SSatish Balay { \
13580c1aa95SSatish Balay  \
13680c1aa95SSatish Balay     brow = row/bs;  \
13780c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
138ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
13980c1aa95SSatish Balay       bcol = col/bs; \
14080c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
141ab26458aSBarry Smith       low = 0; high = nrow; \
142ab26458aSBarry Smith       while (high-low > 3) { \
143ab26458aSBarry Smith         t = (low+high)/2; \
144ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
145ab26458aSBarry Smith         else              low  = t; \
146ab26458aSBarry Smith       } \
147ab26458aSBarry Smith       for (_i=low; _i<high; _i++) { \
14880c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
14980c1aa95SSatish Balay         if (rp[_i] == bcol) { \
15080c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
151eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
152eada6651SSatish Balay           else                    *bap  = value;  \
153ac7a638eSSatish Balay           goto a_noinsert; \
15480c1aa95SSatish Balay         } \
15580c1aa95SSatish Balay       } \
15689280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
157085a36d4SBarry Smith       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
158421e10b8SBarry Smith       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
15980c1aa95SSatish Balay       N = nrow++ - 1;  \
16080c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
16180c1aa95SSatish Balay       for (ii=N; ii>=_i; ii--) { \
16280c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
1633eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
16480c1aa95SSatish Balay       } \
1653eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
16680c1aa95SSatish Balay       rp[_i]                      = bcol;  \
16780c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
168ac7a638eSSatish Balay       a_noinsert:; \
16980c1aa95SSatish Balay     ailen[brow] = nrow; \
17080c1aa95SSatish Balay }
17157b952d6SSatish Balay 
172ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
173ac7a638eSSatish Balay { \
174ac7a638eSSatish Balay     brow = row/bs;  \
175ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
176ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
177ac7a638eSSatish Balay       bcol = col/bs; \
178ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
179ac7a638eSSatish Balay       low = 0; high = nrow; \
180ac7a638eSSatish Balay       while (high-low > 3) { \
181ac7a638eSSatish Balay         t = (low+high)/2; \
182ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
183ac7a638eSSatish Balay         else              low  = t; \
184ac7a638eSSatish Balay       } \
185ac7a638eSSatish Balay       for (_i=low; _i<high; _i++) { \
186ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
187ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
188ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
189ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
190ac7a638eSSatish Balay           else                    *bap  = value;  \
191ac7a638eSSatish Balay           goto b_noinsert; \
192ac7a638eSSatish Balay         } \
193ac7a638eSSatish Balay       } \
19489280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
195085a36d4SBarry Smith       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
196421e10b8SBarry Smith       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
197085a36d4SBarry Smith       CHKMEMQ;\
198ac7a638eSSatish Balay       N = nrow++ - 1;  \
199ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
200ac7a638eSSatish Balay       for (ii=N; ii>=_i; ii--) { \
201ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
2023eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
203ac7a638eSSatish Balay       } \
2043eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
205ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
206ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
207ac7a638eSSatish Balay       b_noinsert:; \
208ac7a638eSSatish Balay     bilen[brow] = nrow; \
209ac7a638eSSatish Balay }
210ac7a638eSSatish Balay 
21193fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2124a2ae208SSatish Balay #undef __FUNCT__
2134a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ"
214b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
21557b952d6SSatish Balay {
2166fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
217dfbe8321SBarry Smith   PetscErrorCode ierr;
218b24ad042SBarry Smith   PetscInt       i,N = m*n;
2196fa18ffdSBarry Smith   MatScalar      *vsingle;
22093fea6afSBarry Smith 
22193fea6afSBarry Smith   PetscFunctionBegin;
2226fa18ffdSBarry Smith   if (N > b->setvalueslen) {
22305b42c5fSBarry Smith     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
22482502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
2256fa18ffdSBarry Smith     b->setvalueslen  = N;
2266fa18ffdSBarry Smith   }
2276fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
2286fa18ffdSBarry Smith 
22993fea6afSBarry Smith   for (i=0; i<N; i++) {
23093fea6afSBarry Smith     vsingle[i] = v[i];
23193fea6afSBarry Smith   }
23293fea6afSBarry Smith   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
23393fea6afSBarry Smith   PetscFunctionReturn(0);
23493fea6afSBarry Smith }
23593fea6afSBarry Smith 
2364a2ae208SSatish Balay #undef __FUNCT__
2374a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
238b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
23993fea6afSBarry Smith {
2406fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
241dfbe8321SBarry Smith   PetscErrorCode ierr;
242b24ad042SBarry Smith   PetscInt       i,N = m*n*b->bs2;
2436fa18ffdSBarry Smith   MatScalar      *vsingle;
24493fea6afSBarry Smith 
24593fea6afSBarry Smith   PetscFunctionBegin;
2466fa18ffdSBarry Smith   if (N > b->setvalueslen) {
24705b42c5fSBarry Smith     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
24882502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
2496fa18ffdSBarry Smith     b->setvalueslen  = N;
2506fa18ffdSBarry Smith   }
2516fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
25293fea6afSBarry Smith   for (i=0; i<N; i++) {
25393fea6afSBarry Smith     vsingle[i] = v[i];
25493fea6afSBarry Smith   }
25593fea6afSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
25693fea6afSBarry Smith   PetscFunctionReturn(0);
25793fea6afSBarry Smith }
2586fa18ffdSBarry Smith 
2594a2ae208SSatish Balay #undef __FUNCT__
2604a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
261b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
2626fa18ffdSBarry Smith {
2636fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
264dfbe8321SBarry Smith   PetscErrorCode ierr;
265b24ad042SBarry Smith   PetscInt       i,N = m*n;
2666fa18ffdSBarry Smith   MatScalar      *vsingle;
2676fa18ffdSBarry Smith 
2686fa18ffdSBarry Smith   PetscFunctionBegin;
2696fa18ffdSBarry Smith   if (N > b->setvalueslen) {
27005b42c5fSBarry Smith     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
27182502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
2726fa18ffdSBarry Smith     b->setvalueslen  = N;
2736fa18ffdSBarry Smith   }
2746fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
2756fa18ffdSBarry Smith   for (i=0; i<N; i++) {
2766fa18ffdSBarry Smith     vsingle[i] = v[i];
2776fa18ffdSBarry Smith   }
2786fa18ffdSBarry Smith   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
2796fa18ffdSBarry Smith   PetscFunctionReturn(0);
2806fa18ffdSBarry Smith }
2816fa18ffdSBarry Smith 
2824a2ae208SSatish Balay #undef __FUNCT__
2834a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
284b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
2856fa18ffdSBarry Smith {
2866fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
287dfbe8321SBarry Smith   PetscErrorCode ierr;
288b24ad042SBarry Smith   PetscInt       i,N = m*n*b->bs2;
2896fa18ffdSBarry Smith   MatScalar      *vsingle;
2906fa18ffdSBarry Smith 
2916fa18ffdSBarry Smith   PetscFunctionBegin;
2926fa18ffdSBarry Smith   if (N > b->setvalueslen) {
29305b42c5fSBarry Smith     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
29482502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
2956fa18ffdSBarry Smith     b->setvalueslen  = N;
2966fa18ffdSBarry Smith   }
2976fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
2986fa18ffdSBarry Smith   for (i=0; i<N; i++) {
2996fa18ffdSBarry Smith     vsingle[i] = v[i];
3006fa18ffdSBarry Smith   }
3016fa18ffdSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3026fa18ffdSBarry Smith   PetscFunctionReturn(0);
3036fa18ffdSBarry Smith }
30493fea6afSBarry Smith #endif
30593fea6afSBarry Smith 
3064a2ae208SSatish Balay #undef __FUNCT__
307e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
308b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
30993fea6afSBarry Smith {
31057b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
31193fea6afSBarry Smith   MatScalar      value;
312273d9f13SBarry Smith   PetscTruth     roworiented = baij->roworiented;
313dfbe8321SBarry Smith   PetscErrorCode ierr;
314b24ad042SBarry Smith   PetscInt       i,j,row,col;
315899cda47SBarry Smith   PetscInt       rstart_orig=mat->rmap.rstart;
316899cda47SBarry Smith   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
317899cda47SBarry Smith   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
31857b952d6SSatish Balay 
319eada6651SSatish Balay   /* Some Variables required in the macro */
32080c1aa95SSatish Balay   Mat            A = baij->A;
32180c1aa95SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
322b24ad042SBarry Smith   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
3233eda8832SBarry Smith   MatScalar      *aa=a->a;
324ac7a638eSSatish Balay 
325ac7a638eSSatish Balay   Mat            B = baij->B;
326ac7a638eSSatish Balay   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
327b24ad042SBarry Smith   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
3283eda8832SBarry Smith   MatScalar      *ba=b->a;
329ac7a638eSSatish Balay 
330b24ad042SBarry Smith   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
331b24ad042SBarry Smith   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
3323eda8832SBarry Smith   MatScalar      *ap,*bap;
33380c1aa95SSatish Balay 
334d64ed03dSBarry Smith   PetscFunctionBegin;
33557b952d6SSatish Balay   for (i=0; i<m; i++) {
3365ef9f2a5SBarry Smith     if (im[i] < 0) continue;
3372515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
338899cda47SBarry Smith     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
339639f9d9dSBarry Smith #endif
34057b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
34157b952d6SSatish Balay       row = im[i] - rstart_orig;
34257b952d6SSatish Balay       for (j=0; j<n; j++) {
34357b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
34457b952d6SSatish Balay           col = in[j] - cstart_orig;
34557b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
346f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
34780c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
34873959e64SBarry Smith         } else if (in[j] < 0) continue;
3492515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
350899cda47SBarry Smith         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
351639f9d9dSBarry Smith #endif
35257b952d6SSatish Balay         else {
35357b952d6SSatish Balay           if (mat->was_assembled) {
354905e6a2fSBarry Smith             if (!baij->colmap) {
355905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
356905e6a2fSBarry Smith             }
357aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
3580f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
359bba1ac68SSatish Balay             col  = col - 1;
36048e59246SSatish Balay #else
361bba1ac68SSatish Balay             col = baij->colmap[in[j]/bs] - 1;
36248e59246SSatish Balay #endif
36357b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
36457b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3658295de27SSatish Balay               col =  in[j];
3669bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
3679bf004c3SSatish Balay               B = baij->B;
3689bf004c3SSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
3699bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
3709bf004c3SSatish Balay               ba=b->a;
371bba1ac68SSatish Balay             } else col += in[j]%bs;
3728295de27SSatish Balay           } else col = in[j];
37357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
37490da58bdSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
37590da58bdSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
37657b952d6SSatish Balay         }
37757b952d6SSatish Balay       }
378d64ed03dSBarry Smith     } else {
37990f02eecSBarry Smith       if (!baij->donotstash) {
380ff2fd236SBarry Smith         if (roworiented) {
3816fa18ffdSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
382ff2fd236SBarry Smith         } else {
3836fa18ffdSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
38457b952d6SSatish Balay         }
38557b952d6SSatish Balay       }
38657b952d6SSatish Balay     }
38790f02eecSBarry Smith   }
3883a40ed3dSBarry Smith   PetscFunctionReturn(0);
38957b952d6SSatish Balay }
39057b952d6SSatish Balay 
3914a2ae208SSatish Balay #undef __FUNCT__
3924a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
393b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
394ab26458aSBarry Smith {
395ab26458aSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
396f15d580aSBarry Smith   const MatScalar *value;
397f15d580aSBarry Smith   MatScalar       *barray=baij->barray;
398273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
399dfbe8321SBarry Smith   PetscErrorCode  ierr;
400899cda47SBarry Smith   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
401899cda47SBarry Smith   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
402899cda47SBarry Smith   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
403ab26458aSBarry Smith 
404b16ae2b1SBarry Smith   PetscFunctionBegin;
40530793edcSSatish Balay   if(!barray) {
40682502324SSatish Balay     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
40782502324SSatish Balay     baij->barray = barray;
40830793edcSSatish Balay   }
40930793edcSSatish Balay 
410ab26458aSBarry Smith   if (roworiented) {
411ab26458aSBarry Smith     stepval = (n-1)*bs;
412ab26458aSBarry Smith   } else {
413ab26458aSBarry Smith     stepval = (m-1)*bs;
414ab26458aSBarry Smith   }
415ab26458aSBarry Smith   for (i=0; i<m; i++) {
4165ef9f2a5SBarry Smith     if (im[i] < 0) continue;
4172515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
41877431f27SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
419ab26458aSBarry Smith #endif
420ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
421ab26458aSBarry Smith       row = im[i] - rstart;
422ab26458aSBarry Smith       for (j=0; j<n; j++) {
42315b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
42415b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
425f15d580aSBarry Smith           barray = (MatScalar*)v + i*bs2;
42615b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
427f15d580aSBarry Smith           barray = (MatScalar*)v + j*bs2;
42815b57d14SSatish Balay         } else { /* Here a copy is required */
429ab26458aSBarry Smith           if (roworiented) {
430ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
431ab26458aSBarry Smith           } else {
432ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
433abef11f7SSatish Balay           }
43447513183SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
43547513183SBarry Smith             for (jj=0; jj<bs; jj++) {
43630793edcSSatish Balay               *barray++  = *value++;
43747513183SBarry Smith             }
43847513183SBarry Smith           }
43930793edcSSatish Balay           barray -=bs2;
44015b57d14SSatish Balay         }
441abef11f7SSatish Balay 
442abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
443abef11f7SSatish Balay           col  = in[j] - cstart;
44493fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
445ab26458aSBarry Smith         }
4465ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
4472515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
44877431f27SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
449ab26458aSBarry Smith #endif
450ab26458aSBarry Smith         else {
451ab26458aSBarry Smith           if (mat->was_assembled) {
452ab26458aSBarry Smith             if (!baij->colmap) {
453ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
454ab26458aSBarry Smith             }
455a5eb4965SSatish Balay 
4562515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
457aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
458b24ad042SBarry Smith             { PetscInt data;
4590f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
46029bbc08cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
461fa46199cSSatish Balay             }
46248e59246SSatish Balay #else
46329bbc08cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
464a5eb4965SSatish Balay #endif
46548e59246SSatish Balay #endif
466aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
4670f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
468fa46199cSSatish Balay             col  = (col - 1)/bs;
46948e59246SSatish Balay #else
470a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
47148e59246SSatish Balay #endif
472ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
473ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
474ab26458aSBarry Smith               col =  in[j];
475ab26458aSBarry Smith             }
476ab26458aSBarry Smith           }
477ab26458aSBarry Smith           else col = in[j];
47893fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
479ab26458aSBarry Smith         }
480ab26458aSBarry Smith       }
481d64ed03dSBarry Smith     } else {
482ab26458aSBarry Smith       if (!baij->donotstash) {
483ff2fd236SBarry Smith         if (roworiented) {
4846fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
485ff2fd236SBarry Smith         } else {
4866fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487ff2fd236SBarry Smith         }
488abef11f7SSatish Balay       }
489ab26458aSBarry Smith     }
490ab26458aSBarry Smith   }
4913a40ed3dSBarry Smith   PetscFunctionReturn(0);
492ab26458aSBarry Smith }
4936fa18ffdSBarry Smith 
4940bdbc534SSatish Balay #define HASH_KEY 0.6180339887
495b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
496b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
497b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
4984a2ae208SSatish Balay #undef __FUNCT__
4994a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
500b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
5010bdbc534SSatish Balay {
5020bdbc534SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
503273d9f13SBarry Smith   PetscTruth     roworiented = baij->roworiented;
504dfbe8321SBarry Smith   PetscErrorCode ierr;
505b24ad042SBarry Smith   PetscInt       i,j,row,col;
506899cda47SBarry Smith   PetscInt       rstart_orig=mat->rmap.rstart;
507899cda47SBarry Smith   PetscInt       rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
508899cda47SBarry Smith   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
509329f5518SBarry Smith   PetscReal      tmp;
5103eda8832SBarry Smith   MatScalar      **HD = baij->hd,value;
5112515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
512b24ad042SBarry Smith   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5134a15367fSSatish Balay #endif
5140bdbc534SSatish Balay 
5150bdbc534SSatish Balay   PetscFunctionBegin;
5160bdbc534SSatish Balay 
5170bdbc534SSatish Balay   for (i=0; i<m; i++) {
5182515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
51929bbc08cSBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
520899cda47SBarry Smith     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
5210bdbc534SSatish Balay #endif
5220bdbc534SSatish Balay       row = im[i];
523c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
5240bdbc534SSatish Balay       for (j=0; j<n; j++) {
5250bdbc534SSatish Balay         col = in[j];
5266fa18ffdSBarry Smith         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
527b24ad042SBarry Smith         /* Look up PetscInto the Hash Table */
528c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
529c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5300bdbc534SSatish Balay 
531c2760754SSatish Balay 
532c2760754SSatish Balay         idx = h1;
5332515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
534187ce0cbSSatish Balay         insert_ct++;
535187ce0cbSSatish Balay         total_ct++;
536187ce0cbSSatish Balay         if (HT[idx] != key) {
537187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
538187ce0cbSSatish Balay           if (idx == size) {
539187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
540187ce0cbSSatish Balay             if (idx == h1) {
54177431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
542187ce0cbSSatish Balay             }
543187ce0cbSSatish Balay           }
544187ce0cbSSatish Balay         }
545187ce0cbSSatish Balay #else
546c2760754SSatish Balay         if (HT[idx] != key) {
547c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
548c2760754SSatish Balay           if (idx == size) {
549c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
550c2760754SSatish Balay             if (idx == h1) {
55177431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
552c2760754SSatish Balay             }
553c2760754SSatish Balay           }
554c2760754SSatish Balay         }
555187ce0cbSSatish Balay #endif
556c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
557c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
558c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
5590bdbc534SSatish Balay       }
5600bdbc534SSatish Balay     } else {
5610bdbc534SSatish Balay       if (!baij->donotstash) {
562ff2fd236SBarry Smith         if (roworiented) {
5638798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
564ff2fd236SBarry Smith         } else {
5658798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5660bdbc534SSatish Balay         }
5670bdbc534SSatish Balay       }
5680bdbc534SSatish Balay     }
5690bdbc534SSatish Balay   }
5702515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
571187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
572187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
573187ce0cbSSatish Balay #endif
5740bdbc534SSatish Balay   PetscFunctionReturn(0);
5750bdbc534SSatish Balay }
5760bdbc534SSatish Balay 
5774a2ae208SSatish Balay #undef __FUNCT__
5784a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
579b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
5800bdbc534SSatish Balay {
5810bdbc534SSatish Balay   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
582273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
583dfbe8321SBarry Smith   PetscErrorCode  ierr;
584b24ad042SBarry Smith   PetscInt        i,j,ii,jj,row,col;
585899cda47SBarry Smith   PetscInt        rstart=baij->rstartbs;
586ab715e2cSSatish Balay   PetscInt        rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2,nbs2=n*bs2;
587b24ad042SBarry Smith   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
588329f5518SBarry Smith   PetscReal       tmp;
5893eda8832SBarry Smith   MatScalar       **HD = baij->hd,*baij_a;
590f15d580aSBarry Smith   const MatScalar *v_t,*value;
5912515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
592b24ad042SBarry Smith   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5934a15367fSSatish Balay #endif
5940bdbc534SSatish Balay 
595d0a41580SSatish Balay   PetscFunctionBegin;
596d0a41580SSatish Balay 
5970bdbc534SSatish Balay   if (roworiented) {
5980bdbc534SSatish Balay     stepval = (n-1)*bs;
5990bdbc534SSatish Balay   } else {
6000bdbc534SSatish Balay     stepval = (m-1)*bs;
6010bdbc534SSatish Balay   }
6020bdbc534SSatish Balay   for (i=0; i<m; i++) {
6032515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
60477431f27SBarry Smith     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
60577431f27SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
6060bdbc534SSatish Balay #endif
6070bdbc534SSatish Balay     row   = im[i];
608ab715e2cSSatish Balay     v_t   = v + i*nbs2;
609c2760754SSatish Balay     if (row >= rstart && row < rend) {
6100bdbc534SSatish Balay       for (j=0; j<n; j++) {
6110bdbc534SSatish Balay         col = in[j];
6120bdbc534SSatish Balay 
6130bdbc534SSatish Balay         /* Look up into the Hash Table */
614c2760754SSatish Balay         key = row*Nbs+col+1;
615c2760754SSatish Balay         h1  = HASH(size,key,tmp);
6160bdbc534SSatish Balay 
617c2760754SSatish Balay         idx = h1;
6182515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
619187ce0cbSSatish Balay         total_ct++;
620187ce0cbSSatish Balay         insert_ct++;
621187ce0cbSSatish Balay        if (HT[idx] != key) {
622187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
623187ce0cbSSatish Balay           if (idx == size) {
624187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
625187ce0cbSSatish Balay             if (idx == h1) {
62677431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
627187ce0cbSSatish Balay             }
628187ce0cbSSatish Balay           }
629187ce0cbSSatish Balay         }
630187ce0cbSSatish Balay #else
631c2760754SSatish Balay         if (HT[idx] != key) {
632c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
633c2760754SSatish Balay           if (idx == size) {
634c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
635c2760754SSatish Balay             if (idx == h1) {
63677431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
637c2760754SSatish Balay             }
638c2760754SSatish Balay           }
639c2760754SSatish Balay         }
640187ce0cbSSatish Balay #endif
641c2760754SSatish Balay         baij_a = HD[idx];
6420bdbc534SSatish Balay         if (roworiented) {
643c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
644187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
645187ce0cbSSatish Balay           value = v_t;
646187ce0cbSSatish Balay           v_t  += bs;
647fef45726SSatish Balay           if (addv == ADD_VALUES) {
648c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
649c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
650fef45726SSatish Balay                 baij_a[jj]  += *value++;
651b4cc0f5aSSatish Balay               }
652b4cc0f5aSSatish Balay             }
653fef45726SSatish Balay           } else {
654c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
655c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
656fef45726SSatish Balay                 baij_a[jj]  = *value++;
657fef45726SSatish Balay               }
658fef45726SSatish Balay             }
659fef45726SSatish Balay           }
6600bdbc534SSatish Balay         } else {
6610bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
662fef45726SSatish Balay           if (addv == ADD_VALUES) {
663b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
6640bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
665fef45726SSatish Balay                 baij_a[jj]  += *value++;
666fef45726SSatish Balay               }
667fef45726SSatish Balay             }
668fef45726SSatish Balay           } else {
669fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
670fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
671fef45726SSatish Balay                 baij_a[jj]  = *value++;
672fef45726SSatish Balay               }
673b4cc0f5aSSatish Balay             }
6740bdbc534SSatish Balay           }
6750bdbc534SSatish Balay         }
6760bdbc534SSatish Balay       }
6770bdbc534SSatish Balay     } else {
6780bdbc534SSatish Balay       if (!baij->donotstash) {
6790bdbc534SSatish Balay         if (roworiented) {
6808798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6810bdbc534SSatish Balay         } else {
6828798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6830bdbc534SSatish Balay         }
6840bdbc534SSatish Balay       }
6850bdbc534SSatish Balay     }
6860bdbc534SSatish Balay   }
6872515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
688187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
689187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
690187ce0cbSSatish Balay #endif
6910bdbc534SSatish Balay   PetscFunctionReturn(0);
6920bdbc534SSatish Balay }
693133cdb44SSatish Balay 
6944a2ae208SSatish Balay #undef __FUNCT__
6954a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ"
696b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
697d6de1c52SSatish Balay {
698d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
6996849ba73SBarry Smith   PetscErrorCode ierr;
700899cda47SBarry Smith   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
701899cda47SBarry Smith   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
702d6de1c52SSatish Balay 
703133cdb44SSatish Balay   PetscFunctionBegin;
704d6de1c52SSatish Balay   for (i=0; i<m; i++) {
70597e567efSBarry Smith     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
706899cda47SBarry Smith     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
707d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
708d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
709d6de1c52SSatish Balay       for (j=0; j<n; j++) {
71097e567efSBarry Smith         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
711899cda47SBarry Smith         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
712d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
713d6de1c52SSatish Balay           col = idxn[j] - bscstart;
71498dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
715d64ed03dSBarry Smith         } else {
716905e6a2fSBarry Smith           if (!baij->colmap) {
717905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
718905e6a2fSBarry Smith           }
719aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
7200f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
721fa46199cSSatish Balay           data --;
72248e59246SSatish Balay #else
72348e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
72448e59246SSatish Balay #endif
72548e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
726d9d09a02SSatish Balay           else {
72748e59246SSatish Balay             col  = data + idxn[j]%bs;
72898dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
729d6de1c52SSatish Balay           }
730d6de1c52SSatish Balay         }
731d6de1c52SSatish Balay       }
732d64ed03dSBarry Smith     } else {
73329bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
734d6de1c52SSatish Balay     }
735d6de1c52SSatish Balay   }
7363a40ed3dSBarry Smith  PetscFunctionReturn(0);
737d6de1c52SSatish Balay }
738d6de1c52SSatish Balay 
7394a2ae208SSatish Balay #undef __FUNCT__
7404a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ"
741dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
742d6de1c52SSatish Balay {
743d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
744d6de1c52SSatish Balay   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
745dfbe8321SBarry Smith   PetscErrorCode ierr;
746899cda47SBarry Smith   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
747329f5518SBarry Smith   PetscReal      sum = 0.0;
7483eda8832SBarry Smith   MatScalar      *v;
749d6de1c52SSatish Balay 
750d64ed03dSBarry Smith   PetscFunctionBegin;
751d6de1c52SSatish Balay   if (baij->size == 1) {
752064f8208SBarry Smith     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
753d6de1c52SSatish Balay   } else {
754d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
755d6de1c52SSatish Balay       v = amat->a;
7568a62d963SHong Zhang       nz = amat->nz*bs2;
7578a62d963SHong Zhang       for (i=0; i<nz; i++) {
758aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
759329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
760d6de1c52SSatish Balay #else
761d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
762d6de1c52SSatish Balay #endif
763d6de1c52SSatish Balay       }
764d6de1c52SSatish Balay       v = bmat->a;
7658a62d963SHong Zhang       nz = bmat->nz*bs2;
7668a62d963SHong Zhang       for (i=0; i<nz; i++) {
767aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
768329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
769d6de1c52SSatish Balay #else
770d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
771d6de1c52SSatish Balay #endif
772d6de1c52SSatish Balay       }
7737adad957SLisandro Dalcin       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
774064f8208SBarry Smith       *nrm = sqrt(*nrm);
7758a62d963SHong Zhang     } else if (type == NORM_1) { /* max column sum */
7768a62d963SHong Zhang       PetscReal *tmp,*tmp2;
777899cda47SBarry Smith       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
778899cda47SBarry Smith       ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
779899cda47SBarry Smith       tmp2 = tmp + mat->cmap.N;
780899cda47SBarry Smith       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
7818a62d963SHong Zhang       v = amat->a; jj = amat->j;
7828a62d963SHong Zhang       for (i=0; i<amat->nz; i++) {
7838a62d963SHong Zhang         for (j=0; j<bs; j++){
7848a62d963SHong Zhang           col = bs*(cstart + *jj) + j; /* column index */
7858a62d963SHong Zhang           for (row=0; row<bs; row++){
7868a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v);  v++;
7878a62d963SHong Zhang           }
7888a62d963SHong Zhang         }
7898a62d963SHong Zhang         jj++;
7908a62d963SHong Zhang       }
7918a62d963SHong Zhang       v = bmat->a; jj = bmat->j;
7928a62d963SHong Zhang       for (i=0; i<bmat->nz; i++) {
7938a62d963SHong Zhang         for (j=0; j<bs; j++){
7948a62d963SHong Zhang           col = bs*garray[*jj] + j;
7958a62d963SHong Zhang           for (row=0; row<bs; row++){
7968a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v); v++;
7978a62d963SHong Zhang           }
7988a62d963SHong Zhang         }
7998a62d963SHong Zhang         jj++;
8008a62d963SHong Zhang       }
8017adad957SLisandro Dalcin       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
8028a62d963SHong Zhang       *nrm = 0.0;
803899cda47SBarry Smith       for (j=0; j<mat->cmap.N; j++) {
8048a62d963SHong Zhang         if (tmp2[j] > *nrm) *nrm = tmp2[j];
8058a62d963SHong Zhang       }
8068a62d963SHong Zhang       ierr = PetscFree(tmp);CHKERRQ(ierr);
8078a62d963SHong Zhang     } else if (type == NORM_INFINITY) { /* max row sum */
808577dd1f9SKris Buschelman       PetscReal *sums;
809577dd1f9SKris Buschelman       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
8108a62d963SHong Zhang       sum = 0.0;
8118a62d963SHong Zhang       for (j=0; j<amat->mbs; j++) {
8128a62d963SHong Zhang         for (row=0; row<bs; row++) sums[row] = 0.0;
8138a62d963SHong Zhang         v = amat->a + bs2*amat->i[j];
8148a62d963SHong Zhang         nz = amat->i[j+1]-amat->i[j];
8158a62d963SHong Zhang         for (i=0; i<nz; i++) {
8168a62d963SHong Zhang           for (col=0; col<bs; col++){
8178a62d963SHong Zhang             for (row=0; row<bs; row++){
8188a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
8198a62d963SHong Zhang             }
8208a62d963SHong Zhang           }
8218a62d963SHong Zhang         }
8228a62d963SHong Zhang         v = bmat->a + bs2*bmat->i[j];
8238a62d963SHong Zhang         nz = bmat->i[j+1]-bmat->i[j];
8248a62d963SHong Zhang         for (i=0; i<nz; i++) {
8258a62d963SHong Zhang           for (col=0; col<bs; col++){
8268a62d963SHong Zhang             for (row=0; row<bs; row++){
8278a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
8288a62d963SHong Zhang             }
8298a62d963SHong Zhang           }
8308a62d963SHong Zhang         }
8318a62d963SHong Zhang         for (row=0; row<bs; row++){
8328a62d963SHong Zhang           if (sums[row] > sum) sum = sums[row];
8338a62d963SHong Zhang         }
8348a62d963SHong Zhang       }
8357adad957SLisandro Dalcin       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
836577dd1f9SKris Buschelman       ierr = PetscFree(sums);CHKERRQ(ierr);
837d64ed03dSBarry Smith     } else {
83829bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
839d6de1c52SSatish Balay     }
840d64ed03dSBarry Smith   }
8413a40ed3dSBarry Smith   PetscFunctionReturn(0);
842d6de1c52SSatish Balay }
84357b952d6SSatish Balay 
844fef45726SSatish Balay /*
845fef45726SSatish Balay   Creates the hash table, and sets the table
846fef45726SSatish Balay   This table is created only once.
847fef45726SSatish Balay   If new entried need to be added to the matrix
848fef45726SSatish Balay   then the hash table has to be destroyed and
849fef45726SSatish Balay   recreated.
850fef45726SSatish Balay */
8514a2ae208SSatish Balay #undef __FUNCT__
8524a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
853dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
854596b8d2eSBarry Smith {
855596b8d2eSBarry Smith   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
856596b8d2eSBarry Smith   Mat            A = baij->A,B=baij->B;
857596b8d2eSBarry Smith   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
858b24ad042SBarry Smith   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
8596849ba73SBarry Smith   PetscErrorCode ierr;
860899cda47SBarry Smith   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
861899cda47SBarry Smith   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
862b24ad042SBarry Smith   PetscInt       *HT,key;
8633eda8832SBarry Smith   MatScalar      **HD;
864329f5518SBarry Smith   PetscReal      tmp;
8656cf91177SBarry Smith #if defined(PETSC_USE_INFO)
866b24ad042SBarry Smith   PetscInt       ct=0,max=0;
8674a15367fSSatish Balay #endif
868fef45726SSatish Balay 
869d64ed03dSBarry Smith   PetscFunctionBegin;
870b24ad042SBarry Smith   baij->ht_size=(PetscInt)(factor*nz);
8710bdbc534SSatish Balay   size = baij->ht_size;
872fef45726SSatish Balay 
8730bdbc534SSatish Balay   if (baij->ht) {
8740bdbc534SSatish Balay     PetscFunctionReturn(0);
875596b8d2eSBarry Smith   }
8760bdbc534SSatish Balay 
877fef45726SSatish Balay   /* Allocate Memory for Hash Table */
878b24ad042SBarry Smith   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
879b24ad042SBarry Smith   baij->ht = (PetscInt*)(baij->hd + size);
880b9e4cc15SSatish Balay   HD       = baij->hd;
881a07cd24cSSatish Balay   HT       = baij->ht;
882b9e4cc15SSatish Balay 
883b9e4cc15SSatish Balay 
884b24ad042SBarry Smith   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
8850bdbc534SSatish Balay 
886596b8d2eSBarry Smith 
887596b8d2eSBarry Smith   /* Loop Over A */
8880bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
889596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
8900bdbc534SSatish Balay       row = i+rstart;
8910bdbc534SSatish Balay       col = aj[j]+cstart;
892596b8d2eSBarry Smith 
893187ce0cbSSatish Balay       key = row*Nbs + col + 1;
894c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8950bdbc534SSatish Balay       for (k=0; k<size; k++){
896958c9bccSBarry Smith         if (!HT[(h1+k)%size]) {
8970bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8980bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
899596b8d2eSBarry Smith           break;
9006cf91177SBarry Smith #if defined(PETSC_USE_INFO)
901187ce0cbSSatish Balay         } else {
902187ce0cbSSatish Balay           ct++;
903187ce0cbSSatish Balay #endif
904596b8d2eSBarry Smith         }
905187ce0cbSSatish Balay       }
9066cf91177SBarry Smith #if defined(PETSC_USE_INFO)
907187ce0cbSSatish Balay       if (k> max) max = k;
908187ce0cbSSatish Balay #endif
909596b8d2eSBarry Smith     }
910596b8d2eSBarry Smith   }
911596b8d2eSBarry Smith   /* Loop Over B */
9120bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
913596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
9140bdbc534SSatish Balay       row = i+rstart;
9150bdbc534SSatish Balay       col = garray[bj[j]];
916187ce0cbSSatish Balay       key = row*Nbs + col + 1;
917c2760754SSatish Balay       h1  = HASH(size,key,tmp);
9180bdbc534SSatish Balay       for (k=0; k<size; k++){
919958c9bccSBarry Smith         if (!HT[(h1+k)%size]) {
9200bdbc534SSatish Balay           HT[(h1+k)%size] = key;
9210bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
922596b8d2eSBarry Smith           break;
9236cf91177SBarry Smith #if defined(PETSC_USE_INFO)
924187ce0cbSSatish Balay         } else {
925187ce0cbSSatish Balay           ct++;
926187ce0cbSSatish Balay #endif
927596b8d2eSBarry Smith         }
928187ce0cbSSatish Balay       }
9296cf91177SBarry Smith #if defined(PETSC_USE_INFO)
930187ce0cbSSatish Balay       if (k> max) max = k;
931187ce0cbSSatish Balay #endif
932596b8d2eSBarry Smith     }
933596b8d2eSBarry Smith   }
934596b8d2eSBarry Smith 
935596b8d2eSBarry Smith   /* Print Summary */
9366cf91177SBarry Smith #if defined(PETSC_USE_INFO)
937c38d4ed2SBarry Smith   for (i=0,j=0; i<size; i++) {
938596b8d2eSBarry Smith     if (HT[i]) {j++;}
939c38d4ed2SBarry Smith   }
9401e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
941187ce0cbSSatish Balay #endif
9423a40ed3dSBarry Smith   PetscFunctionReturn(0);
943596b8d2eSBarry Smith }
94457b952d6SSatish Balay 
9454a2ae208SSatish Balay #undef __FUNCT__
9464a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
947dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
948bbb85fb3SSatish Balay {
949bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
950dfbe8321SBarry Smith   PetscErrorCode ierr;
951b24ad042SBarry Smith   PetscInt       nstash,reallocs;
952bbb85fb3SSatish Balay   InsertMode     addv;
953bbb85fb3SSatish Balay 
954bbb85fb3SSatish Balay   PetscFunctionBegin;
955bbb85fb3SSatish Balay   if (baij->donotstash) {
956bbb85fb3SSatish Balay     PetscFunctionReturn(0);
957bbb85fb3SSatish Balay   }
958bbb85fb3SSatish Balay 
959bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
9607adad957SLisandro Dalcin   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
961bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
96229bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
963bbb85fb3SSatish Balay   }
964bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
965bbb85fb3SSatish Balay 
9661e2582c4SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
9671e2582c4SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
9688798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
9691e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
97046680499SSatish Balay   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
9711e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
972bbb85fb3SSatish Balay   PetscFunctionReturn(0);
973bbb85fb3SSatish Balay }
974bbb85fb3SSatish Balay 
9754a2ae208SSatish Balay #undef __FUNCT__
9764a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
977dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
978bbb85fb3SSatish Balay {
979bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
98091c97fd4SSatish Balay   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
9816849ba73SBarry Smith   PetscErrorCode ierr;
982b24ad042SBarry Smith   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
983b24ad042SBarry Smith   PetscInt       *row,*col,other_disassembled;
9847c922b88SBarry Smith   PetscTruth     r1,r2,r3;
9853eda8832SBarry Smith   MatScalar      *val;
986bbb85fb3SSatish Balay   InsertMode     addv = mat->insertmode;
987b24ad042SBarry Smith   PetscMPIInt    n;
988bbb85fb3SSatish Balay 
98991c97fd4SSatish Balay   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
990bbb85fb3SSatish Balay   PetscFunctionBegin;
991bbb85fb3SSatish Balay   if (!baij->donotstash) {
992a2d1c673SSatish Balay     while (1) {
9938798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
994a2d1c673SSatish Balay       if (!flg) break;
995a2d1c673SSatish Balay 
996bbb85fb3SSatish Balay       for (i=0; i<n;) {
997bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
998bbb85fb3SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
999bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
1000bbb85fb3SSatish Balay         else       ncols = n-i;
1001bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
100293fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1003bbb85fb3SSatish Balay         i = j;
1004bbb85fb3SSatish Balay       }
1005bbb85fb3SSatish Balay     }
10068798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1007a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
1008a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
1009a2d1c673SSatish Balay        restore the original flags */
1010a2d1c673SSatish Balay     r1 = baij->roworiented;
1011a2d1c673SSatish Balay     r2 = a->roworiented;
101291c97fd4SSatish Balay     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
10137c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
10147c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
101591c97fd4SSatish Balay     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
1016a2d1c673SSatish Balay     while (1) {
10178798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1018a2d1c673SSatish Balay       if (!flg) break;
1019a2d1c673SSatish Balay 
1020a2d1c673SSatish Balay       for (i=0; i<n;) {
1021a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1022a2d1c673SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1023a2d1c673SSatish Balay         if (j < n) ncols = j-i;
1024a2d1c673SSatish Balay         else       ncols = n-i;
102593fea6afSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1026a2d1c673SSatish Balay         i = j;
1027a2d1c673SSatish Balay       }
1028a2d1c673SSatish Balay     }
10298798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1030a2d1c673SSatish Balay     baij->roworiented = r1;
1031a2d1c673SSatish Balay     a->roworiented    = r2;
103291c97fd4SSatish Balay     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
1033bbb85fb3SSatish Balay   }
1034bbb85fb3SSatish Balay 
1035bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1036bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1037bbb85fb3SSatish Balay 
1038bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
1039bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1040bbb85fb3SSatish Balay   /*
1041bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1042bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
1043bbb85fb3SSatish Balay   */
1044bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
10457adad957SLisandro Dalcin     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
1047bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1048bbb85fb3SSatish Balay     }
1049bbb85fb3SSatish Balay   }
1050bbb85fb3SSatish Balay 
1051bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1052bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1053bbb85fb3SSatish Balay   }
105491c97fd4SSatish Balay   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
1055bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1056bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1057bbb85fb3SSatish Balay 
10586cf91177SBarry Smith #if defined(PETSC_USE_INFO)
1059bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
10601e2582c4SBarry Smith     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
1061bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
1062bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
1063bbb85fb3SSatish Balay   }
1064bbb85fb3SSatish Balay #endif
1065bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1066bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1067bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1068bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1069bbb85fb3SSatish Balay   }
1070bbb85fb3SSatish Balay 
1071606d414cSSatish Balay   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1072606d414cSSatish Balay   baij->rowvalues = 0;
1073bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1074bbb85fb3SSatish Balay }
107557b952d6SSatish Balay 
10764a2ae208SSatish Balay #undef __FUNCT__
10774a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
10786849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
107957b952d6SSatish Balay {
108057b952d6SSatish Balay   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1081dfbe8321SBarry Smith   PetscErrorCode    ierr;
1082b24ad042SBarry Smith   PetscMPIInt       size = baij->size,rank = baij->rank;
1083899cda47SBarry Smith   PetscInt          bs = mat->rmap.bs;
108432077d6dSBarry Smith   PetscTruth        iascii,isdraw;
1085b0a32e0cSBarry Smith   PetscViewer       sviewer;
1086f3ef73ceSBarry Smith   PetscViewerFormat format;
108757b952d6SSatish Balay 
1088d64ed03dSBarry Smith   PetscFunctionBegin;
108932077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1090fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
109132077d6dSBarry Smith   if (iascii) {
1092b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1093456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
10944e220ebcSLois Curfman McInnes       MatInfo info;
10957adad957SLisandro Dalcin       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1096d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
109777431f27SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1098899cda47SBarry Smith               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1099899cda47SBarry Smith               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
1100d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
110177431f27SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1102d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
110377431f27SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1104b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
110507d81ca4SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
110657b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
11073a40ed3dSBarry Smith       PetscFunctionReturn(0);
1108fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
110977431f27SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
11103a40ed3dSBarry Smith       PetscFunctionReturn(0);
111104929863SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
111204929863SHong Zhang       PetscFunctionReturn(0);
111357b952d6SSatish Balay     }
111457b952d6SSatish Balay   }
111557b952d6SSatish Balay 
11160f5bd95cSBarry Smith   if (isdraw) {
1117b0a32e0cSBarry Smith     PetscDraw       draw;
111857b952d6SSatish Balay     PetscTruth isnull;
1119b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1120b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
112157b952d6SSatish Balay   }
112257b952d6SSatish Balay 
112357b952d6SSatish Balay   if (size == 1) {
11247adad957SLisandro Dalcin     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
112557b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1126d64ed03dSBarry Smith   } else {
112757b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
112857b952d6SSatish Balay     Mat         A;
112957b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
1130899cda47SBarry Smith     PetscInt    M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
11313eda8832SBarry Smith     MatScalar   *a;
113257b952d6SSatish Balay 
1133f204ca49SKris Buschelman     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1134f204ca49SKris Buschelman     /* Perhaps this should be the type of mat? */
11357adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
113657b952d6SSatish Balay     if (!rank) {
1137f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1138d64ed03dSBarry Smith     } else {
1139f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
114057b952d6SSatish Balay     }
1141f204ca49SKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1142899cda47SBarry Smith     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
114352e6d16bSBarry Smith     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
114457b952d6SSatish Balay 
114557b952d6SSatish Balay     /* copy over the A part */
114657b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->A->data;
114757b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1148b24ad042SBarry Smith     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
114957b952d6SSatish Balay 
115057b952d6SSatish Balay     for (i=0; i<mbs; i++) {
1151899cda47SBarry Smith       rvals[0] = bs*(baij->rstartbs + i);
115257b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
115357b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1154899cda47SBarry Smith         col = (baij->cstartbs+aj[j])*bs;
115557b952d6SSatish Balay         for (k=0; k<bs; k++) {
115693fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1157cee3aa6bSSatish Balay           col++; a += bs;
115857b952d6SSatish Balay         }
115957b952d6SSatish Balay       }
116057b952d6SSatish Balay     }
116157b952d6SSatish Balay     /* copy over the B part */
116257b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
116357b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
116457b952d6SSatish Balay     for (i=0; i<mbs; i++) {
1165899cda47SBarry Smith       rvals[0] = bs*(baij->rstartbs + i);
116657b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
116757b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
116857b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
116957b952d6SSatish Balay         for (k=0; k<bs; k++) {
117093fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1171cee3aa6bSSatish Balay           col++; a += bs;
117257b952d6SSatish Balay         }
117357b952d6SSatish Balay       }
117457b952d6SSatish Balay     }
1175606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
11766d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
11776d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
117855843e3eSBarry Smith     /*
117955843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1180b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
118155843e3eSBarry Smith     */
1182b0a32e0cSBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183f14a1c24SBarry Smith     if (!rank) {
11847adad957SLisandro Dalcin       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
11856831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
118657b952d6SSatish Balay     }
1187b0a32e0cSBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
118857b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
118957b952d6SSatish Balay   }
11903a40ed3dSBarry Smith   PetscFunctionReturn(0);
119157b952d6SSatish Balay }
119257b952d6SSatish Balay 
11934a2ae208SSatish Balay #undef __FUNCT__
11944a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ"
1195dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
119657b952d6SSatish Balay {
1197dfbe8321SBarry Smith   PetscErrorCode ierr;
119832077d6dSBarry Smith   PetscTruth     iascii,isdraw,issocket,isbinary;
119957b952d6SSatish Balay 
1200d64ed03dSBarry Smith   PetscFunctionBegin;
120132077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1202fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1203b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1204fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
120532077d6dSBarry Smith   if (iascii || isdraw || issocket || isbinary) {
12067b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
12075cd90555SBarry Smith   } else {
120879a5c55eSBarry Smith     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
120957b952d6SSatish Balay   }
12103a40ed3dSBarry Smith   PetscFunctionReturn(0);
121157b952d6SSatish Balay }
121257b952d6SSatish Balay 
12134a2ae208SSatish Balay #undef __FUNCT__
12144a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ"
1215dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
121679bdfe76SSatish Balay {
121779bdfe76SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1218dfbe8321SBarry Smith   PetscErrorCode ierr;
121979bdfe76SSatish Balay 
1220d64ed03dSBarry Smith   PetscFunctionBegin;
1221aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1222899cda47SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
122379bdfe76SSatish Balay #endif
12248798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12258798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
122679bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
122779bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1228aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
12299c666560SBarry Smith   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
123048e59246SSatish Balay #else
123105b42c5fSBarry Smith   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
123248e59246SSatish Balay #endif
123305b42c5fSBarry Smith   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1234606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1235606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
123605b42c5fSBarry Smith   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
123705b42c5fSBarry Smith   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
123805b42c5fSBarry Smith   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
12396fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
124005b42c5fSBarry Smith   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
12416fa18ffdSBarry Smith #endif
1242899cda47SBarry Smith   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1243606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
1244901853e0SKris Buschelman 
1245dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1246901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1247901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1248901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1249901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1250aac34f13SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1251901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1252901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
12533a40ed3dSBarry Smith   PetscFunctionReturn(0);
125479bdfe76SSatish Balay }
125579bdfe76SSatish Balay 
12564a2ae208SSatish Balay #undef __FUNCT__
12574a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ"
1258dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1259cee3aa6bSSatish Balay {
1260cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1261dfbe8321SBarry Smith   PetscErrorCode ierr;
1262b24ad042SBarry Smith   PetscInt       nt;
1263cee3aa6bSSatish Balay 
1264d64ed03dSBarry Smith   PetscFunctionBegin;
1265e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1266899cda47SBarry Smith   if (nt != A->cmap.n) {
126729bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
126847b4a8eaSLois Curfman McInnes   }
1269e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1270899cda47SBarry Smith   if (nt != A->rmap.n) {
127129bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
127247b4a8eaSLois Curfman McInnes   }
1273ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1274f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1275ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
12773a40ed3dSBarry Smith   PetscFunctionReturn(0);
1278cee3aa6bSSatish Balay }
1279cee3aa6bSSatish Balay 
12804a2ae208SSatish Balay #undef __FUNCT__
12814a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1282dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1283cee3aa6bSSatish Balay {
1284cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1285dfbe8321SBarry Smith   PetscErrorCode ierr;
1286d64ed03dSBarry Smith 
1287d64ed03dSBarry Smith   PetscFunctionBegin;
1288ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1289f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1290ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1291f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
12923a40ed3dSBarry Smith   PetscFunctionReturn(0);
1293cee3aa6bSSatish Balay }
1294cee3aa6bSSatish Balay 
12954a2ae208SSatish Balay #undef __FUNCT__
12964a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1297dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1298cee3aa6bSSatish Balay {
1299cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1300dfbe8321SBarry Smith   PetscErrorCode ierr;
1301a5ff213dSBarry Smith   PetscTruth     merged;
1302cee3aa6bSSatish Balay 
1303d64ed03dSBarry Smith   PetscFunctionBegin;
1304a5ff213dSBarry Smith   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1305cee3aa6bSSatish Balay   /* do nondiagonal part */
13067c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1307a5ff213dSBarry Smith   if (!merged) {
1308cee3aa6bSSatish Balay     /* send it on its way */
1309ca9f406cSSatish Balay     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1310cee3aa6bSSatish Balay     /* do local part */
13117c922b88SBarry Smith     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1312cee3aa6bSSatish Balay     /* receive remote parts: note this assumes the values are not actually */
1313a5ff213dSBarry Smith     /* inserted in yy until the next line */
1314ca9f406cSSatish Balay     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1315a5ff213dSBarry Smith   } else {
1316a5ff213dSBarry Smith     /* do local part */
1317a5ff213dSBarry Smith     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1318a5ff213dSBarry Smith     /* send it on its way */
1319ca9f406cSSatish Balay     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1320a5ff213dSBarry Smith     /* values actually were received in the Begin() but we need to call this nop */
1321ca9f406cSSatish Balay     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1322a5ff213dSBarry Smith   }
13233a40ed3dSBarry Smith   PetscFunctionReturn(0);
1324cee3aa6bSSatish Balay }
1325cee3aa6bSSatish Balay 
13264a2ae208SSatish Balay #undef __FUNCT__
13274a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1328dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1329cee3aa6bSSatish Balay {
1330cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1331dfbe8321SBarry Smith   PetscErrorCode ierr;
1332cee3aa6bSSatish Balay 
1333d64ed03dSBarry Smith   PetscFunctionBegin;
1334cee3aa6bSSatish Balay   /* do nondiagonal part */
13357c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1336cee3aa6bSSatish Balay   /* send it on its way */
1337ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1338cee3aa6bSSatish Balay   /* do local part */
13397c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1340cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1341cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1342cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1343ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
13443a40ed3dSBarry Smith   PetscFunctionReturn(0);
1345cee3aa6bSSatish Balay }
1346cee3aa6bSSatish Balay 
1347cee3aa6bSSatish Balay /*
1348cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1349cee3aa6bSSatish Balay    diagonal block
1350cee3aa6bSSatish Balay */
13514a2ae208SSatish Balay #undef __FUNCT__
13524a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1353dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1354cee3aa6bSSatish Balay {
1355cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1356dfbe8321SBarry Smith   PetscErrorCode ierr;
1357d64ed03dSBarry Smith 
1358d64ed03dSBarry Smith   PetscFunctionBegin;
1359899cda47SBarry Smith   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
13603a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
13613a40ed3dSBarry Smith   PetscFunctionReturn(0);
1362cee3aa6bSSatish Balay }
1363cee3aa6bSSatish Balay 
13644a2ae208SSatish Balay #undef __FUNCT__
13654a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ"
1366f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1367cee3aa6bSSatish Balay {
1368cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1369dfbe8321SBarry Smith   PetscErrorCode ierr;
1370d64ed03dSBarry Smith 
1371d64ed03dSBarry Smith   PetscFunctionBegin;
1372f4df32b1SMatthew Knepley   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1373f4df32b1SMatthew Knepley   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
13743a40ed3dSBarry Smith   PetscFunctionReturn(0);
1375cee3aa6bSSatish Balay }
1376026e39d0SSatish Balay 
13774a2ae208SSatish Balay #undef __FUNCT__
13784a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ"
1379b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1380acdf5bf4SSatish Balay {
1381acdf5bf4SSatish Balay   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
138287828ca2SBarry Smith   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
13836849ba73SBarry Smith   PetscErrorCode ierr;
1384899cda47SBarry Smith   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1385899cda47SBarry Smith   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1386899cda47SBarry Smith   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1387acdf5bf4SSatish Balay 
1388d64ed03dSBarry Smith   PetscFunctionBegin;
1389abc0a331SBarry Smith   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1390acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1391acdf5bf4SSatish Balay 
1392acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1393acdf5bf4SSatish Balay     /*
1394acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1395acdf5bf4SSatish Balay     */
1396acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1397b24ad042SBarry Smith     PetscInt     max = 1,mbs = mat->mbs,tmp;
1398bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1399acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1400acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1401acdf5bf4SSatish Balay     }
1402b24ad042SBarry Smith     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1403b24ad042SBarry Smith     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1404acdf5bf4SSatish Balay   }
1405acdf5bf4SSatish Balay 
140629bbc08cSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1407d9d09a02SSatish Balay   lrow = row - brstart;
1408acdf5bf4SSatish Balay 
1409acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1410acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1411acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1412f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1413f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1414acdf5bf4SSatish Balay   nztot = nzA + nzB;
1415acdf5bf4SSatish Balay 
1416acdf5bf4SSatish Balay   cmap  = mat->garray;
1417acdf5bf4SSatish Balay   if (v  || idx) {
1418acdf5bf4SSatish Balay     if (nztot) {
1419acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1420b24ad042SBarry Smith       PetscInt imark = -1;
1421acdf5bf4SSatish Balay       if (v) {
1422acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1423acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1424d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1425acdf5bf4SSatish Balay           else break;
1426acdf5bf4SSatish Balay         }
1427acdf5bf4SSatish Balay         imark = i;
1428acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1429acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1430acdf5bf4SSatish Balay       }
1431acdf5bf4SSatish Balay       if (idx) {
1432acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1433acdf5bf4SSatish Balay         if (imark > -1) {
1434acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1435bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1436acdf5bf4SSatish Balay           }
1437acdf5bf4SSatish Balay         } else {
1438acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
1439d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1440d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1441acdf5bf4SSatish Balay             else break;
1442acdf5bf4SSatish Balay           }
1443acdf5bf4SSatish Balay           imark = i;
1444acdf5bf4SSatish Balay         }
1445d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1446d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1447acdf5bf4SSatish Balay       }
1448d64ed03dSBarry Smith     } else {
1449d212a18eSSatish Balay       if (idx) *idx = 0;
1450d212a18eSSatish Balay       if (v)   *v   = 0;
1451d212a18eSSatish Balay     }
1452acdf5bf4SSatish Balay   }
1453acdf5bf4SSatish Balay   *nz = nztot;
1454f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1455f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
14563a40ed3dSBarry Smith   PetscFunctionReturn(0);
1457acdf5bf4SSatish Balay }
1458acdf5bf4SSatish Balay 
14594a2ae208SSatish Balay #undef __FUNCT__
14604a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1461b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1462acdf5bf4SSatish Balay {
1463acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1464d64ed03dSBarry Smith 
1465d64ed03dSBarry Smith   PetscFunctionBegin;
1466abc0a331SBarry Smith   if (!baij->getrowactive) {
146729bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1468acdf5bf4SSatish Balay   }
1469acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
14703a40ed3dSBarry Smith   PetscFunctionReturn(0);
1471acdf5bf4SSatish Balay }
1472acdf5bf4SSatish Balay 
14734a2ae208SSatish Balay #undef __FUNCT__
14744a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1475dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
147658667388SSatish Balay {
147758667388SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1478dfbe8321SBarry Smith   PetscErrorCode ierr;
1479d64ed03dSBarry Smith 
1480d64ed03dSBarry Smith   PetscFunctionBegin;
148158667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
148258667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
14833a40ed3dSBarry Smith   PetscFunctionReturn(0);
148458667388SSatish Balay }
14850ac07820SSatish Balay 
14864a2ae208SSatish Balay #undef __FUNCT__
14874a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1488dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14890ac07820SSatish Balay {
14904e220ebcSLois Curfman McInnes   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
14914e220ebcSLois Curfman McInnes   Mat            A = a->A,B = a->B;
1492dfbe8321SBarry Smith   PetscErrorCode ierr;
1493329f5518SBarry Smith   PetscReal      isend[5],irecv[5];
14940ac07820SSatish Balay 
1495d64ed03dSBarry Smith   PetscFunctionBegin;
1496899cda47SBarry Smith   info->block_size     = (PetscReal)matin->rmap.bs;
14974e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
14980e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1499de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
15004e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
15010e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1502de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
15030ac07820SSatish Balay   if (flag == MAT_LOCAL) {
15044e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
15054e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
15064e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
15074e220ebcSLois Curfman McInnes     info->memory       = isend[3];
15084e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
15090ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
15107adad957SLisandro Dalcin     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
15114e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15124e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15134e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15144e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15154e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
15160ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
15177adad957SLisandro Dalcin     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
15184e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15194e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15204e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15214e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15224e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1523d41123aaSBarry Smith   } else {
152477431f27SBarry Smith     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
15250ac07820SSatish Balay   }
1526899cda47SBarry Smith   info->rows_global       = (PetscReal)A->rmap.N;
1527899cda47SBarry Smith   info->columns_global    = (PetscReal)A->cmap.N;
1528899cda47SBarry Smith   info->rows_local        = (PetscReal)A->rmap.N;
1529899cda47SBarry Smith   info->columns_local     = (PetscReal)A->cmap.N;
15304e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
15314e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
15324e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
15333a40ed3dSBarry Smith   PetscFunctionReturn(0);
15340ac07820SSatish Balay }
15350ac07820SSatish Balay 
15364a2ae208SSatish Balay #undef __FUNCT__
15374a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ"
15384e0d8c25SBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
153958667388SSatish Balay {
154058667388SSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1541dfbe8321SBarry Smith   PetscErrorCode ierr;
154258667388SSatish Balay 
1543d64ed03dSBarry Smith   PetscFunctionBegin;
154412c028f9SKris Buschelman   switch (op) {
1545512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
154612c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
154712c028f9SKris Buschelman   case MAT_KEEP_ZEROED_ROWS:
154812c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
15494e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
15504e0d8c25SBarry Smith     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
155112c028f9SKris Buschelman     break;
155212c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
15534e0d8c25SBarry Smith     a->roworiented = flg;
15544e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
15554e0d8c25SBarry Smith     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
155612c028f9SKris Buschelman     break;
15574e0d8c25SBarry Smith   case MAT_NEW_DIAGONALS:
1558290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
155912c028f9SKris Buschelman     break;
156012c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
15614e0d8c25SBarry Smith     a->donotstash = flg;
156212c028f9SKris Buschelman     break;
156312c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
15644e0d8c25SBarry Smith     a->ht_flag = flg;
156512c028f9SKris Buschelman     break;
156677e54ba9SKris Buschelman   case MAT_SYMMETRIC:
156777e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
15682188ac68SBarry Smith   case MAT_HERMITIAN:
15692188ac68SBarry Smith   case MAT_SYMMETRY_ETERNAL:
15704e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
157177e54ba9SKris Buschelman     break;
157212c028f9SKris Buschelman   default:
1573ad86a440SBarry Smith     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1574d64ed03dSBarry Smith   }
15753a40ed3dSBarry Smith   PetscFunctionReturn(0);
157658667388SSatish Balay }
157758667388SSatish Balay 
15784a2ae208SSatish Balay #undef __FUNCT__
15794a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ("
1580dfbe8321SBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
15810ac07820SSatish Balay {
15820ac07820SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
15830ac07820SSatish Balay   Mat_SeqBAIJ    *Aloc;
15840ac07820SSatish Balay   Mat            B;
1585dfbe8321SBarry Smith   PetscErrorCode ierr;
1586899cda47SBarry Smith   PetscInt       M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1587899cda47SBarry Smith   PetscInt       bs=A->rmap.bs,mbs=baij->mbs;
15883eda8832SBarry Smith   MatScalar      *a;
15890ac07820SSatish Balay 
1590d64ed03dSBarry Smith   PetscFunctionBegin;
159129bbc08cSBarry Smith   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
15927adad957SLisandro Dalcin   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1593899cda47SBarry Smith   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
15947adad957SLisandro Dalcin   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1595899cda47SBarry Smith   ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
15960ac07820SSatish Balay 
15970ac07820SSatish Balay   /* copy over the A part */
15980ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
15990ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1600b24ad042SBarry Smith   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
16010ac07820SSatish Balay 
16020ac07820SSatish Balay   for (i=0; i<mbs; i++) {
1603899cda47SBarry Smith     rvals[0] = bs*(baij->rstartbs + i);
16040ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16050ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1606899cda47SBarry Smith       col = (baij->cstartbs+aj[j])*bs;
16070ac07820SSatish Balay       for (k=0; k<bs; k++) {
160893fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16090ac07820SSatish Balay         col++; a += bs;
16100ac07820SSatish Balay       }
16110ac07820SSatish Balay     }
16120ac07820SSatish Balay   }
16130ac07820SSatish Balay   /* copy over the B part */
16140ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
16150ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
16160ac07820SSatish Balay   for (i=0; i<mbs; i++) {
1617899cda47SBarry Smith     rvals[0] = bs*(baij->rstartbs + i);
16180ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16190ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16200ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
16210ac07820SSatish Balay       for (k=0; k<bs; k++) {
162293fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16230ac07820SSatish Balay         col++; a += bs;
16240ac07820SSatish Balay       }
16250ac07820SSatish Balay     }
16260ac07820SSatish Balay   }
1627606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
16280ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16290ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16300ac07820SSatish Balay 
16317c922b88SBarry Smith   if (matout) {
16320ac07820SSatish Balay     *matout = B;
16330ac07820SSatish Balay   } else {
1634273d9f13SBarry Smith     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
16350ac07820SSatish Balay   }
16363a40ed3dSBarry Smith   PetscFunctionReturn(0);
16370ac07820SSatish Balay }
16380e95ebc0SSatish Balay 
16394a2ae208SSatish Balay #undef __FUNCT__
16404a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1641dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
16420e95ebc0SSatish Balay {
164336c4a09eSSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
164436c4a09eSSatish Balay   Mat            a = baij->A,b = baij->B;
1645dfbe8321SBarry Smith   PetscErrorCode ierr;
1646b24ad042SBarry Smith   PetscInt       s1,s2,s3;
16470e95ebc0SSatish Balay 
1648d64ed03dSBarry Smith   PetscFunctionBegin;
164936c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
165036c4a09eSSatish Balay   if (rr) {
165136c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
165229bbc08cSBarry Smith     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
165336c4a09eSSatish Balay     /* Overlap communication with computation. */
1654ca9f406cSSatish Balay     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
165536c4a09eSSatish Balay   }
16560e95ebc0SSatish Balay   if (ll) {
16570e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
165829bbc08cSBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1659a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
16600e95ebc0SSatish Balay   }
166136c4a09eSSatish Balay   /* scale  the diagonal block */
166236c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
166336c4a09eSSatish Balay 
166436c4a09eSSatish Balay   if (rr) {
166536c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
1666ca9f406cSSatish Balay     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
166836c4a09eSSatish Balay   }
166936c4a09eSSatish Balay 
16703a40ed3dSBarry Smith   PetscFunctionReturn(0);
16710e95ebc0SSatish Balay }
16720e95ebc0SSatish Balay 
16734a2ae208SSatish Balay #undef __FUNCT__
16744a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1675f4df32b1SMatthew Knepley PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
16760ac07820SSatish Balay {
16770ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
16786849ba73SBarry Smith   PetscErrorCode ierr;
1679b24ad042SBarry Smith   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1680899cda47SBarry Smith   PetscInt       i,*owners = A->rmap.range;
1681b24ad042SBarry Smith   PetscInt       *nprocs,j,idx,nsends,row;
1682b24ad042SBarry Smith   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
16837adad957SLisandro Dalcin   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1684357c27ecSBarry Smith   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
16857adad957SLisandro Dalcin   MPI_Comm       comm = ((PetscObject)A)->comm;
16860ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
16870ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
16886543fbbaSBarry Smith #if defined(PETSC_DEBUG)
16896543fbbaSBarry Smith   PetscTruth     found = PETSC_FALSE;
16906543fbbaSBarry Smith #endif
16910ac07820SSatish Balay 
1692d64ed03dSBarry Smith   PetscFunctionBegin;
16930ac07820SSatish Balay   /*  first count number of contributors to each processor */
1694b24ad042SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1695b24ad042SBarry Smith   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1696b24ad042SBarry Smith   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
16976543fbbaSBarry Smith   j = 0;
16980ac07820SSatish Balay   for (i=0; i<N; i++) {
16996543fbbaSBarry Smith     if (lastidx > (idx = rows[i])) j = 0;
17006543fbbaSBarry Smith     lastidx = idx;
17016543fbbaSBarry Smith     for (; j<size; j++) {
1702357c27ecSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
17036543fbbaSBarry Smith         nprocs[2*j]++;
17046543fbbaSBarry Smith         nprocs[2*j+1] = 1;
17056543fbbaSBarry Smith         owner[i] = j;
17066543fbbaSBarry Smith #if defined(PETSC_DEBUG)
17076543fbbaSBarry Smith         found = PETSC_TRUE;
17086543fbbaSBarry Smith #endif
17096543fbbaSBarry Smith         break;
17100ac07820SSatish Balay       }
17110ac07820SSatish Balay     }
17126543fbbaSBarry Smith #if defined(PETSC_DEBUG)
171329bbc08cSBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
17146543fbbaSBarry Smith     found = PETSC_FALSE;
17156543fbbaSBarry Smith #endif
17160ac07820SSatish Balay   }
1717c1dc657dSBarry Smith   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
17180ac07820SSatish Balay 
17190ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
1720c1dc657dSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
17210ac07820SSatish Balay 
17220ac07820SSatish Balay   /* post receives:   */
1723b24ad042SBarry Smith   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1724b0a32e0cSBarry Smith   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
17250ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
1726b24ad042SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
17270ac07820SSatish Balay   }
17280ac07820SSatish Balay 
17290ac07820SSatish Balay   /* do sends:
17300ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
17310ac07820SSatish Balay      the ith processor
17320ac07820SSatish Balay   */
1733b24ad042SBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1734b0a32e0cSBarry Smith   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1735b24ad042SBarry Smith   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
17360ac07820SSatish Balay   starts[0]  = 0;
1737c1dc657dSBarry Smith   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
17380ac07820SSatish Balay   for (i=0; i<N; i++) {
17390ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
17400ac07820SSatish Balay   }
17410ac07820SSatish Balay 
17420ac07820SSatish Balay   starts[0] = 0;
1743c1dc657dSBarry Smith   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
17440ac07820SSatish Balay   count = 0;
17450ac07820SSatish Balay   for (i=0; i<size; i++) {
1746c1dc657dSBarry Smith     if (nprocs[2*i+1]) {
1747b24ad042SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
17480ac07820SSatish Balay     }
17490ac07820SSatish Balay   }
1750606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
17510ac07820SSatish Balay 
1752357c27ecSBarry Smith   base = owners[rank];
17530ac07820SSatish Balay 
17540ac07820SSatish Balay   /*  wait on receives */
1755b24ad042SBarry Smith   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
17560ac07820SSatish Balay   source = lens + nrecvs;
17570ac07820SSatish Balay   count  = nrecvs; slen = 0;
17580ac07820SSatish Balay   while (count) {
1759ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
17600ac07820SSatish Balay     /* unpack receives into our local space */
1761b24ad042SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
17620ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
17630ac07820SSatish Balay     lens[imdex]    = n;
17640ac07820SSatish Balay     slen          += n;
17650ac07820SSatish Balay     count--;
17660ac07820SSatish Balay   }
1767606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
17680ac07820SSatish Balay 
17690ac07820SSatish Balay   /* move the data into the send scatter */
1770b24ad042SBarry Smith   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
17710ac07820SSatish Balay   count = 0;
17720ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
17730ac07820SSatish Balay     values = rvalues + i*nmax;
17740ac07820SSatish Balay     for (j=0; j<lens[i]; j++) {
17750ac07820SSatish Balay       lrows[count++] = values[j] - base;
17760ac07820SSatish Balay     }
17770ac07820SSatish Balay   }
1778606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1779606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1780606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1781606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
17820ac07820SSatish Balay 
17830ac07820SSatish Balay   /* actually zap the local rows */
178472dacd9aSBarry Smith   /*
178572dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
1786a8c7a070SBarry Smith      is square and the user wishes to set the diagonal we use separate
178772dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
178872dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
178972dacd9aSBarry Smith 
1790f4df32b1SMatthew Knepley        Contributed by: Matthew Knepley
179172dacd9aSBarry Smith   */
17929c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1793f4df32b1SMatthew Knepley   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1794899cda47SBarry Smith   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1795f4df32b1SMatthew Knepley     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1796f4df32b1SMatthew Knepley   } else if (diag != 0.0) {
1797f4df32b1SMatthew Knepley     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1798fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
179929bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1800512a5fc5SBarry Smith MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
18016525c446SSatish Balay     }
1802a07cd24cSSatish Balay     for (i=0; i<slen; i++) {
1803a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
1804f4df32b1SMatthew Knepley       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1805a07cd24cSSatish Balay     }
1806a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1807a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
18089c957beeSSatish Balay   } else {
1809f4df32b1SMatthew Knepley     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1810a07cd24cSSatish Balay   }
18119c957beeSSatish Balay 
1812606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1813a07cd24cSSatish Balay 
18140ac07820SSatish Balay   /* wait on sends */
18150ac07820SSatish Balay   if (nsends) {
181682502324SSatish Balay     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1817ca161407SBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1818606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
18190ac07820SSatish Balay   }
1820606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1821606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
18220ac07820SSatish Balay 
18233a40ed3dSBarry Smith   PetscFunctionReturn(0);
18240ac07820SSatish Balay }
182572dacd9aSBarry Smith 
18264a2ae208SSatish Balay #undef __FUNCT__
18274a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1828dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1829bb5a7306SBarry Smith {
1830bb5a7306SBarry Smith   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1831dfbe8321SBarry Smith   PetscErrorCode ierr;
1832d64ed03dSBarry Smith 
1833d64ed03dSBarry Smith   PetscFunctionBegin;
1834bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
18353a40ed3dSBarry Smith   PetscFunctionReturn(0);
1836bb5a7306SBarry Smith }
1837bb5a7306SBarry Smith 
18386849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
18390ac07820SSatish Balay 
18404a2ae208SSatish Balay #undef __FUNCT__
18414a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ"
1842dfbe8321SBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
18437fc3c18eSBarry Smith {
18447fc3c18eSBarry Smith   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
18457fc3c18eSBarry Smith   Mat            a,b,c,d;
18467fc3c18eSBarry Smith   PetscTruth     flg;
1847dfbe8321SBarry Smith   PetscErrorCode ierr;
18487fc3c18eSBarry Smith 
18497fc3c18eSBarry Smith   PetscFunctionBegin;
18507fc3c18eSBarry Smith   a = matA->A; b = matA->B;
18517fc3c18eSBarry Smith   c = matB->A; d = matB->B;
18527fc3c18eSBarry Smith 
18537fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1854abc0a331SBarry Smith   if (flg) {
18557fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
18567fc3c18eSBarry Smith   }
18577adad957SLisandro Dalcin   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
18587fc3c18eSBarry Smith   PetscFunctionReturn(0);
18597fc3c18eSBarry Smith }
18607fc3c18eSBarry Smith 
18613c896bc6SHong Zhang #undef __FUNCT__
18623c896bc6SHong Zhang #define __FUNCT__ "MatCopy_MPIBAIJ"
18633c896bc6SHong Zhang PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
18643c896bc6SHong Zhang {
18653c896bc6SHong Zhang   PetscErrorCode ierr;
18663c896bc6SHong Zhang   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
18673c896bc6SHong Zhang   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
18683c896bc6SHong Zhang 
18693c896bc6SHong Zhang   PetscFunctionBegin;
18703c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
18713c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
18723c896bc6SHong Zhang     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
18733c896bc6SHong Zhang   } else {
18743c896bc6SHong Zhang     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
18753c896bc6SHong Zhang     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
18763c896bc6SHong Zhang   }
18773c896bc6SHong Zhang   PetscFunctionReturn(0);
18783c896bc6SHong Zhang }
1879273d9f13SBarry Smith 
18804a2ae208SSatish Balay #undef __FUNCT__
18814a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1882dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1883273d9f13SBarry Smith {
1884dfbe8321SBarry Smith   PetscErrorCode ierr;
1885273d9f13SBarry Smith 
1886273d9f13SBarry Smith   PetscFunctionBegin;
18877edd0491SSatish Balay   ierr =  MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1888273d9f13SBarry Smith   PetscFunctionReturn(0);
1889273d9f13SBarry Smith }
1890273d9f13SBarry Smith 
18914fe895cdSHong Zhang #include "petscblaslapack.h"
18924fe895cdSHong Zhang #undef __FUNCT__
18934fe895cdSHong Zhang #define __FUNCT__ "MatAXPY_MPIBAIJ"
18944fe895cdSHong Zhang PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
18954fe895cdSHong Zhang {
18964fe895cdSHong Zhang   PetscErrorCode ierr;
18974fe895cdSHong Zhang   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
18984fe895cdSHong Zhang   PetscBLASInt   bnz,one=1;
18994fe895cdSHong Zhang   Mat_SeqBAIJ    *x,*y;
19004fe895cdSHong Zhang 
19014fe895cdSHong Zhang   PetscFunctionBegin;
19024fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
19034fe895cdSHong Zhang     PetscScalar alpha = a;
19044fe895cdSHong Zhang     x = (Mat_SeqBAIJ *)xx->A->data;
19054fe895cdSHong Zhang     y = (Mat_SeqBAIJ *)yy->A->data;
19064fe895cdSHong Zhang     bnz = (PetscBLASInt)x->nz;
19074fe895cdSHong Zhang     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
19084fe895cdSHong Zhang     x = (Mat_SeqBAIJ *)xx->B->data;
19094fe895cdSHong Zhang     y = (Mat_SeqBAIJ *)yy->B->data;
19104fe895cdSHong Zhang     bnz = (PetscBLASInt)x->nz;
19114fe895cdSHong Zhang     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
19124fe895cdSHong Zhang   } else {
19134fe895cdSHong Zhang     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
19144fe895cdSHong Zhang   }
19154fe895cdSHong Zhang   PetscFunctionReturn(0);
19164fe895cdSHong Zhang }
19174fe895cdSHong Zhang 
191899cafbc1SBarry Smith #undef __FUNCT__
191999cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_MPIBAIJ"
192099cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
192199cafbc1SBarry Smith {
192299cafbc1SBarry Smith   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
192399cafbc1SBarry Smith   PetscErrorCode ierr;
192499cafbc1SBarry Smith 
192599cafbc1SBarry Smith   PetscFunctionBegin;
192699cafbc1SBarry Smith   ierr = MatRealPart(a->A);CHKERRQ(ierr);
192799cafbc1SBarry Smith   ierr = MatRealPart(a->B);CHKERRQ(ierr);
192899cafbc1SBarry Smith   PetscFunctionReturn(0);
192999cafbc1SBarry Smith }
193099cafbc1SBarry Smith 
193199cafbc1SBarry Smith #undef __FUNCT__
193299cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
193399cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
193499cafbc1SBarry Smith {
193599cafbc1SBarry Smith   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
193699cafbc1SBarry Smith   PetscErrorCode ierr;
193799cafbc1SBarry Smith 
193899cafbc1SBarry Smith   PetscFunctionBegin;
193999cafbc1SBarry Smith   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
194099cafbc1SBarry Smith   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
194199cafbc1SBarry Smith   PetscFunctionReturn(0);
194299cafbc1SBarry Smith }
194399cafbc1SBarry Smith 
194479bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1945cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1946cc2dc46cSBarry Smith        MatSetValues_MPIBAIJ,
1947cc2dc46cSBarry Smith        MatGetRow_MPIBAIJ,
1948cc2dc46cSBarry Smith        MatRestoreRow_MPIBAIJ,
1949cc2dc46cSBarry Smith        MatMult_MPIBAIJ,
195097304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ,
19517c922b88SBarry Smith        MatMultTranspose_MPIBAIJ,
19527c922b88SBarry Smith        MatMultTransposeAdd_MPIBAIJ,
1953cc2dc46cSBarry Smith        0,
1954cc2dc46cSBarry Smith        0,
1955cc2dc46cSBarry Smith        0,
195697304618SKris Buschelman /*10*/ 0,
1957cc2dc46cSBarry Smith        0,
1958cc2dc46cSBarry Smith        0,
1959cc2dc46cSBarry Smith        0,
1960cc2dc46cSBarry Smith        MatTranspose_MPIBAIJ,
196197304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ,
19627fc3c18eSBarry Smith        MatEqual_MPIBAIJ,
1963cc2dc46cSBarry Smith        MatGetDiagonal_MPIBAIJ,
1964cc2dc46cSBarry Smith        MatDiagonalScale_MPIBAIJ,
1965cc2dc46cSBarry Smith        MatNorm_MPIBAIJ,
196697304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ,
1967cc2dc46cSBarry Smith        MatAssemblyEnd_MPIBAIJ,
1968cc2dc46cSBarry Smith        0,
1969cc2dc46cSBarry Smith        MatSetOption_MPIBAIJ,
1970cc2dc46cSBarry Smith        MatZeroEntries_MPIBAIJ,
197197304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ,
1972cc2dc46cSBarry Smith        0,
1973cc2dc46cSBarry Smith        0,
1974cc2dc46cSBarry Smith        0,
1975cc2dc46cSBarry Smith        0,
197697304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ,
1977273d9f13SBarry Smith        0,
1978cc2dc46cSBarry Smith        0,
1979cc2dc46cSBarry Smith        0,
1980cc2dc46cSBarry Smith        0,
198197304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ,
1982cc2dc46cSBarry Smith        0,
1983cc2dc46cSBarry Smith        0,
1984cc2dc46cSBarry Smith        0,
1985cc2dc46cSBarry Smith        0,
19864fe895cdSHong Zhang /*40*/ MatAXPY_MPIBAIJ,
1987cc2dc46cSBarry Smith        MatGetSubMatrices_MPIBAIJ,
1988cc2dc46cSBarry Smith        MatIncreaseOverlap_MPIBAIJ,
1989cc2dc46cSBarry Smith        MatGetValues_MPIBAIJ,
19903c896bc6SHong Zhang        MatCopy_MPIBAIJ,
19918c07d4e3SBarry Smith /*45*/ 0,
1992cc2dc46cSBarry Smith        MatScale_MPIBAIJ,
1993cc2dc46cSBarry Smith        0,
1994cc2dc46cSBarry Smith        0,
1995cc2dc46cSBarry Smith        0,
1996521d7252SBarry Smith /*50*/ 0,
1997cc2dc46cSBarry Smith        0,
1998cc2dc46cSBarry Smith        0,
1999cc2dc46cSBarry Smith        0,
2000cc2dc46cSBarry Smith        0,
200197304618SKris Buschelman /*55*/ 0,
2002cc2dc46cSBarry Smith        0,
2003cc2dc46cSBarry Smith        MatSetUnfactored_MPIBAIJ,
2004cc2dc46cSBarry Smith        0,
2005cc2dc46cSBarry Smith        MatSetValuesBlocked_MPIBAIJ,
200697304618SKris Buschelman /*60*/ 0,
2007f14a1c24SBarry Smith        MatDestroy_MPIBAIJ,
2008f14a1c24SBarry Smith        MatView_MPIBAIJ,
2009357abbc8SBarry Smith        0,
20107843d17aSBarry Smith        0,
201197304618SKris Buschelman /*65*/ 0,
20127843d17aSBarry Smith        0,
20137843d17aSBarry Smith        0,
20147843d17aSBarry Smith        0,
20157843d17aSBarry Smith        0,
2016985db425SBarry Smith /*70*/ MatGetRowMaxAbs_MPIBAIJ,
20177843d17aSBarry Smith        0,
201897304618SKris Buschelman        0,
201997304618SKris Buschelman        0,
202097304618SKris Buschelman        0,
202197304618SKris Buschelman /*75*/ 0,
202297304618SKris Buschelman        0,
202397304618SKris Buschelman        0,
202497304618SKris Buschelman        0,
202597304618SKris Buschelman        0,
202697304618SKris Buschelman /*80*/ 0,
202797304618SKris Buschelman        0,
202897304618SKris Buschelman        0,
202997304618SKris Buschelman        0,
2030865e5f61SKris Buschelman        MatLoad_MPIBAIJ,
2031865e5f61SKris Buschelman /*85*/ 0,
2032865e5f61SKris Buschelman        0,
2033865e5f61SKris Buschelman        0,
2034865e5f61SKris Buschelman        0,
2035865e5f61SKris Buschelman        0,
2036865e5f61SKris Buschelman /*90*/ 0,
2037865e5f61SKris Buschelman        0,
2038865e5f61SKris Buschelman        0,
2039865e5f61SKris Buschelman        0,
2040865e5f61SKris Buschelman        0,
2041865e5f61SKris Buschelman /*95*/ 0,
2042865e5f61SKris Buschelman        0,
2043865e5f61SKris Buschelman        0,
204499cafbc1SBarry Smith        0,
204599cafbc1SBarry Smith        0,
204699cafbc1SBarry Smith /*100*/0,
204799cafbc1SBarry Smith        0,
204899cafbc1SBarry Smith        0,
204999cafbc1SBarry Smith        0,
205099cafbc1SBarry Smith        0,
205199cafbc1SBarry Smith /*105*/0,
205299cafbc1SBarry Smith        MatRealPart_MPIBAIJ,
205399cafbc1SBarry Smith        MatImaginaryPart_MPIBAIJ};
205479bdfe76SSatish Balay 
20555ef9f2a5SBarry Smith 
2056e18c124aSSatish Balay EXTERN_C_BEGIN
20574a2ae208SSatish Balay #undef __FUNCT__
20584a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2059be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
20605ef9f2a5SBarry Smith {
20615ef9f2a5SBarry Smith   PetscFunctionBegin;
20625ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
20635ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
20645ef9f2a5SBarry Smith   PetscFunctionReturn(0);
20655ef9f2a5SBarry Smith }
2066e18c124aSSatish Balay EXTERN_C_END
206779bdfe76SSatish Balay 
2068273d9f13SBarry Smith EXTERN_C_BEGIN
2069f69a0ea3SMatthew Knepley extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2070d94109b8SHong Zhang EXTERN_C_END
2071d94109b8SHong Zhang 
2072aac34f13SBarry Smith #undef __FUNCT__
2073aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2074b7940d39SSatish Balay PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2075aac34f13SBarry Smith {
2076899cda47SBarry Smith   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)B->data;
2077899cda47SBarry Smith   PetscInt       m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
2078899cda47SBarry Smith   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
2079aac34f13SBarry Smith   const PetscInt *JJ;
2080aac34f13SBarry Smith   PetscScalar    *values;
2081aac34f13SBarry Smith   PetscErrorCode ierr;
2082aac34f13SBarry Smith 
2083aac34f13SBarry Smith   PetscFunctionBegin;
2084aac34f13SBarry Smith #if defined(PETSC_OPT_g)
2085b7940d39SSatish Balay   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2086aac34f13SBarry Smith #endif
2087aac34f13SBarry Smith   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2088aac34f13SBarry Smith   o_nnz = d_nnz + m;
2089aac34f13SBarry Smith 
2090aac34f13SBarry Smith   for (i=0; i<m; i++) {
2091b7940d39SSatish Balay     nnz     = Ii[i+1]- Ii[i];
2092b7940d39SSatish Balay     JJ      = J + Ii[i];
2093aac34f13SBarry Smith     nnz_max = PetscMax(nnz_max,nnz);
2094aac34f13SBarry Smith #if defined(PETSC_OPT_g)
2095aac34f13SBarry Smith     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2096aac34f13SBarry Smith #endif
2097aac34f13SBarry Smith     for (j=0; j<nnz; j++) {
2098aac34f13SBarry Smith       if (*JJ >= cstart) break;
2099aac34f13SBarry Smith       JJ++;
2100aac34f13SBarry Smith     }
2101aac34f13SBarry Smith     d = 0;
2102aac34f13SBarry Smith     for (; j<nnz; j++) {
2103aac34f13SBarry Smith       if (*JJ++ >= cend) break;
2104aac34f13SBarry Smith       d++;
2105aac34f13SBarry Smith     }
2106aac34f13SBarry Smith     d_nnz[i] = d;
2107aac34f13SBarry Smith     o_nnz[i] = nnz - d;
2108aac34f13SBarry Smith   }
2109aac34f13SBarry Smith   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2110aac34f13SBarry Smith   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2111aac34f13SBarry Smith 
2112aac34f13SBarry Smith   if (v) values = (PetscScalar*)v;
2113aac34f13SBarry Smith   else {
2114aac34f13SBarry Smith     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2115aac34f13SBarry Smith     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2116aac34f13SBarry Smith   }
2117aac34f13SBarry Smith 
2118aac34f13SBarry Smith   for (i=0; i<m; i++) {
2119aac34f13SBarry Smith     ii   = i + rstart;
2120b7940d39SSatish Balay     nnz  = Ii[i+1]- Ii[i];
2121b7940d39SSatish Balay     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2122aac34f13SBarry Smith   }
2123aac34f13SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2124aac34f13SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2125aac34f13SBarry Smith 
2126aac34f13SBarry Smith   if (!v) {
2127aac34f13SBarry Smith     ierr = PetscFree(values);CHKERRQ(ierr);
2128aac34f13SBarry Smith   }
2129aac34f13SBarry Smith   PetscFunctionReturn(0);
2130aac34f13SBarry Smith }
2131aac34f13SBarry Smith 
2132aac34f13SBarry Smith #undef __FUNCT__
2133aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2134aac34f13SBarry Smith /*@C
2135aac34f13SBarry Smith    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2136aac34f13SBarry Smith    (the default parallel PETSc format).
2137aac34f13SBarry Smith 
2138aac34f13SBarry Smith    Collective on MPI_Comm
2139aac34f13SBarry Smith 
2140aac34f13SBarry Smith    Input Parameters:
2141aac34f13SBarry Smith +  A - the matrix
2142aac34f13SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
2143aac34f13SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2144aac34f13SBarry Smith -  v - optional values in the matrix
2145aac34f13SBarry Smith 
2146aac34f13SBarry Smith    Level: developer
2147aac34f13SBarry Smith 
2148aac34f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel
2149aac34f13SBarry Smith 
2150aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2151aac34f13SBarry Smith @*/
2152be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2153aac34f13SBarry Smith {
2154aac34f13SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2155aac34f13SBarry Smith 
2156aac34f13SBarry Smith   PetscFunctionBegin;
2157aac34f13SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2158aac34f13SBarry Smith   if (f) {
2159aac34f13SBarry Smith     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2160aac34f13SBarry Smith   }
2161aac34f13SBarry Smith   PetscFunctionReturn(0);
2162aac34f13SBarry Smith }
2163aac34f13SBarry Smith 
2164d94109b8SHong Zhang EXTERN_C_BEGIN
21654a2ae208SSatish Balay #undef __FUNCT__
2166a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2167be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2168a23d5eceSKris Buschelman {
2169a23d5eceSKris Buschelman   Mat_MPIBAIJ    *b;
2170dfbe8321SBarry Smith   PetscErrorCode ierr;
2171b24ad042SBarry Smith   PetscInt       i;
2172a23d5eceSKris Buschelman 
2173a23d5eceSKris Buschelman   PetscFunctionBegin;
2174a23d5eceSKris Buschelman   B->preallocated = PETSC_TRUE;
21757adad957SLisandro Dalcin   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
21768c07d4e3SBarry Smith     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
21778c07d4e3SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2178a23d5eceSKris Buschelman 
2179a23d5eceSKris Buschelman   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2180a23d5eceSKris Buschelman   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2181a23d5eceSKris Buschelman   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
218277431f27SBarry Smith   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
218377431f27SBarry Smith   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2184899cda47SBarry Smith 
2185899cda47SBarry Smith   B->rmap.bs  = bs;
2186899cda47SBarry Smith   B->cmap.bs  = bs;
21876148ca0dSBarry Smith   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
21886148ca0dSBarry Smith   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2189899cda47SBarry Smith 
2190a23d5eceSKris Buschelman   if (d_nnz) {
2191899cda47SBarry Smith     for (i=0; i<B->rmap.n/bs; i++) {
219277431f27SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2193a23d5eceSKris Buschelman     }
2194a23d5eceSKris Buschelman   }
2195a23d5eceSKris Buschelman   if (o_nnz) {
2196899cda47SBarry Smith     for (i=0; i<B->rmap.n/bs; i++) {
219777431f27SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2198a23d5eceSKris Buschelman     }
2199a23d5eceSKris Buschelman   }
2200a23d5eceSKris Buschelman 
2201a23d5eceSKris Buschelman   b = (Mat_MPIBAIJ*)B->data;
2202a23d5eceSKris Buschelman   b->bs2 = bs*bs;
2203899cda47SBarry Smith   b->mbs = B->rmap.n/bs;
2204899cda47SBarry Smith   b->nbs = B->cmap.n/bs;
2205899cda47SBarry Smith   b->Mbs = B->rmap.N/bs;
2206899cda47SBarry Smith   b->Nbs = B->cmap.N/bs;
2207a23d5eceSKris Buschelman 
2208a23d5eceSKris Buschelman   for (i=0; i<=b->size; i++) {
2209899cda47SBarry Smith     b->rangebs[i] = B->rmap.range[i]/bs;
2210a23d5eceSKris Buschelman   }
2211899cda47SBarry Smith   b->rstartbs = B->rmap.rstart/bs;
2212899cda47SBarry Smith   b->rendbs   = B->rmap.rend/bs;
2213899cda47SBarry Smith   b->cstartbs = B->cmap.rstart/bs;
2214899cda47SBarry Smith   b->cendbs   = B->cmap.rend/bs;
2215a23d5eceSKris Buschelman 
2216f69a0ea3SMatthew Knepley   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2217899cda47SBarry Smith   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
22189c097c71SKris Buschelman   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2219c60e587dSKris Buschelman   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
222052e6d16bSBarry Smith   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2221f69a0ea3SMatthew Knepley   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2222899cda47SBarry Smith   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
22239c097c71SKris Buschelman   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2224c60e587dSKris Buschelman   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
222552e6d16bSBarry Smith   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2226c60e587dSKris Buschelman 
22277adad957SLisandro Dalcin   ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2228a23d5eceSKris Buschelman 
2229a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2230a23d5eceSKris Buschelman }
2231a23d5eceSKris Buschelman EXTERN_C_END
2232a23d5eceSKris Buschelman 
2233a23d5eceSKris Buschelman EXTERN_C_BEGIN
2234be1d678aSKris Buschelman EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2235be1d678aSKris Buschelman EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
223692b32695SKris Buschelman EXTERN_C_END
22375bf65638SKris Buschelman 
22380bad9183SKris Buschelman /*MC
2239fafad747SKris Buschelman    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
22400bad9183SKris Buschelman 
22410bad9183SKris Buschelman    Options Database Keys:
22428c07d4e3SBarry Smith + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
22438c07d4e3SBarry Smith . -mat_block_size <bs> - set the blocksize used to store the matrix
22448c07d4e3SBarry Smith - -mat_use_hash_table <fact>
22450bad9183SKris Buschelman 
22460bad9183SKris Buschelman   Level: beginner
22470bad9183SKris Buschelman 
22480bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ
22490bad9183SKris Buschelman M*/
22500bad9183SKris Buschelman 
225192b32695SKris Buschelman EXTERN_C_BEGIN
2252a23d5eceSKris Buschelman #undef __FUNCT__
22534a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ"
2254be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2255273d9f13SBarry Smith {
2256273d9f13SBarry Smith   Mat_MPIBAIJ    *b;
2257dfbe8321SBarry Smith   PetscErrorCode ierr;
2258273d9f13SBarry Smith   PetscTruth     flg;
2259273d9f13SBarry Smith 
2260273d9f13SBarry Smith   PetscFunctionBegin;
226138f2d2fdSLisandro Dalcin   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
226282502324SSatish Balay   B->data = (void*)b;
226382502324SSatish Balay 
2264085a36d4SBarry Smith 
2265273d9f13SBarry Smith   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2266273d9f13SBarry Smith   B->mapping    = 0;
2267273d9f13SBarry Smith   B->factor     = 0;
2268273d9f13SBarry Smith   B->assembled  = PETSC_FALSE;
2269273d9f13SBarry Smith 
2270273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
22717adad957SLisandro Dalcin   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
22727adad957SLisandro Dalcin   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2273273d9f13SBarry Smith 
2274273d9f13SBarry Smith   /* build local table of row and column ownerships */
2275899cda47SBarry Smith   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2276273d9f13SBarry Smith 
2277273d9f13SBarry Smith   /* build cache for off array entries formed */
22787adad957SLisandro Dalcin   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2279273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
2280273d9f13SBarry Smith   b->colmap      = PETSC_NULL;
2281273d9f13SBarry Smith   b->garray      = PETSC_NULL;
2282273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
2283273d9f13SBarry Smith 
2284cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
2285273d9f13SBarry Smith   /* stuff for MatSetValues_XXX in single precision */
228664a35ccbSBarry Smith   b->setvalueslen     = 0;
2287273d9f13SBarry Smith   b->setvaluescopy    = PETSC_NULL;
2288273d9f13SBarry Smith #endif
2289273d9f13SBarry Smith 
2290273d9f13SBarry Smith   /* stuff used in block assembly */
2291273d9f13SBarry Smith   b->barray       = 0;
2292273d9f13SBarry Smith 
2293273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
2294273d9f13SBarry Smith   b->lvec         = 0;
2295273d9f13SBarry Smith   b->Mvctx        = 0;
2296273d9f13SBarry Smith 
2297273d9f13SBarry Smith   /* stuff for MatGetRow() */
2298273d9f13SBarry Smith   b->rowindices   = 0;
2299273d9f13SBarry Smith   b->rowvalues    = 0;
2300273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
2301273d9f13SBarry Smith 
2302273d9f13SBarry Smith   /* hash table stuff */
2303273d9f13SBarry Smith   b->ht           = 0;
2304273d9f13SBarry Smith   b->hd           = 0;
2305273d9f13SBarry Smith   b->ht_size      = 0;
2306273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
2307273d9f13SBarry Smith   b->ht_fact      = 0;
2308273d9f13SBarry Smith   b->ht_total_ct  = 0;
2309273d9f13SBarry Smith   b->ht_insert_ct = 0;
2310273d9f13SBarry Smith 
23117adad957SLisandro Dalcin   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
23128c07d4e3SBarry Smith     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2313273d9f13SBarry Smith     if (flg) {
2314f6275e2eSBarry Smith       PetscReal fact = 1.39;
23154e0d8c25SBarry Smith       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
23168c07d4e3SBarry Smith       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2317273d9f13SBarry Smith       if (fact <= 1.0) fact = 1.39;
2318273d9f13SBarry Smith       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
23191e2582c4SBarry Smith       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2320273d9f13SBarry Smith     }
23218c07d4e3SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
23228c07d4e3SBarry Smith 
2323273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2324273d9f13SBarry Smith                                      "MatStoreValues_MPIBAIJ",
2325273d9f13SBarry Smith                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2326273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2327273d9f13SBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
2328273d9f13SBarry Smith                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2329273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2330273d9f13SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
2331273d9f13SBarry Smith                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2332a23d5eceSKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2333a23d5eceSKris Buschelman                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2334a23d5eceSKris Buschelman                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2335aac34f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
233644ec7894SLisandro Dalcin 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
2337aac34f13SBarry Smith 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
233892b32695SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
233992b32695SKris Buschelman                                      "MatDiagonalScaleLocal_MPIBAIJ",
234092b32695SKris Buschelman                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
23415bf65638SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
23425bf65638SKris Buschelman                                      "MatSetHashTableFactor_MPIBAIJ",
23435bf65638SKris Buschelman                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
234417667f90SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2345273d9f13SBarry Smith   PetscFunctionReturn(0);
2346273d9f13SBarry Smith }
2347273d9f13SBarry Smith EXTERN_C_END
2348273d9f13SBarry Smith 
2349209238afSKris Buschelman /*MC
2350002d173eSKris Buschelman    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2351209238afSKris Buschelman 
2352209238afSKris Buschelman    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2353209238afSKris Buschelman    and MATMPIBAIJ otherwise.
2354209238afSKris Buschelman 
2355209238afSKris Buschelman    Options Database Keys:
2356209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2357209238afSKris Buschelman 
2358209238afSKris Buschelman   Level: beginner
2359209238afSKris Buschelman 
2360aac34f13SBarry Smith .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2361209238afSKris Buschelman M*/
2362209238afSKris Buschelman 
2363209238afSKris Buschelman EXTERN_C_BEGIN
2364209238afSKris Buschelman #undef __FUNCT__
2365209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ"
2366be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2367dfbe8321SBarry Smith {
23686849ba73SBarry Smith   PetscErrorCode ierr;
2369b24ad042SBarry Smith   PetscMPIInt    size;
2370209238afSKris Buschelman 
2371209238afSKris Buschelman   PetscFunctionBegin;
23727adad957SLisandro Dalcin   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2373209238afSKris Buschelman   if (size == 1) {
2374209238afSKris Buschelman     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2375209238afSKris Buschelman   } else {
2376209238afSKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2377209238afSKris Buschelman   }
2378209238afSKris Buschelman   PetscFunctionReturn(0);
2379209238afSKris Buschelman }
2380209238afSKris Buschelman EXTERN_C_END
2381209238afSKris Buschelman 
23824a2ae208SSatish Balay #undef __FUNCT__
23834a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2384273d9f13SBarry Smith /*@C
2385aac34f13SBarry Smith    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2386273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
2387273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
2388273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2389273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
2390273d9f13SBarry Smith 
2391273d9f13SBarry Smith    Collective on Mat
2392273d9f13SBarry Smith 
2393273d9f13SBarry Smith    Input Parameters:
2394273d9f13SBarry Smith +  A - the matrix
2395273d9f13SBarry Smith .  bs   - size of blockk
2396273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2397273d9f13SBarry Smith            submatrix  (same for all local rows)
2398273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
2399273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
2400273d9f13SBarry Smith            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2401273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2402273d9f13SBarry Smith            submatrix (same for all local rows).
2403273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
2404273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
2405273d9f13SBarry Smith            each block row) or PETSC_NULL.
2406273d9f13SBarry Smith 
240749a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
2408273d9f13SBarry Smith 
2409273d9f13SBarry Smith    Options Database Keys:
24108c07d4e3SBarry Smith +   -mat_block_size - size of the blocks to use
24118c07d4e3SBarry Smith -   -mat_use_hash_table <fact>
2412273d9f13SBarry Smith 
2413273d9f13SBarry Smith    Notes:
2414273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2415273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
2416273d9f13SBarry Smith 
2417273d9f13SBarry Smith    Storage Information:
2418273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
2419273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
2420273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
2421273d9f13SBarry Smith    local matrix (a rectangular submatrix).
2422273d9f13SBarry Smith 
2423273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
2424273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
2425273d9f13SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2426273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
2427273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2428273d9f13SBarry Smith 
2429273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2430273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
2431273d9f13SBarry Smith 
2432273d9f13SBarry Smith .vb
2433273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
2434273d9f13SBarry Smith           -------------------
2435273d9f13SBarry Smith    row 3  |  o o o d d d o o o o o o
2436273d9f13SBarry Smith    row 4  |  o o o d d d o o o o o o
2437273d9f13SBarry Smith    row 5  |  o o o d d d o o o o o o
2438273d9f13SBarry Smith           -------------------
2439273d9f13SBarry Smith .ve
2440273d9f13SBarry Smith 
2441273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
2442273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
2443273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2444273d9f13SBarry Smith    stored simply in the MATSEQBAIJ format for compressed row storage.
2445273d9f13SBarry Smith 
2446273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2447273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2448273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
2449273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2450273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
2451273d9f13SBarry Smith    matrices.
2452273d9f13SBarry Smith 
2453*aa95bbe8SBarry Smith    You can call MatGetInfo() to get information on how effective the preallocation was;
2454*aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2455*aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
2456*aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
2457*aa95bbe8SBarry Smith 
2458273d9f13SBarry Smith    Level: intermediate
2459273d9f13SBarry Smith 
2460273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel
2461273d9f13SBarry Smith 
2462aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2463273d9f13SBarry Smith @*/
2464be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2465273d9f13SBarry Smith {
2466b24ad042SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2467273d9f13SBarry Smith 
2468273d9f13SBarry Smith   PetscFunctionBegin;
2469a23d5eceSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2470a23d5eceSKris Buschelman   if (f) {
2471a23d5eceSKris Buschelman     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2472273d9f13SBarry Smith   }
2473273d9f13SBarry Smith   PetscFunctionReturn(0);
2474273d9f13SBarry Smith }
2475273d9f13SBarry Smith 
24764a2ae208SSatish Balay #undef __FUNCT__
24774a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ"
247879bdfe76SSatish Balay /*@C
247979bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
248079bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
248179bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
248279bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
248379bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
248479bdfe76SSatish Balay 
2485db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
2486db81eaa0SLois Curfman McInnes 
248779bdfe76SSatish Balay    Input Parameters:
2488db81eaa0SLois Curfman McInnes +  comm - MPI communicator
248979bdfe76SSatish Balay .  bs   - size of blockk
249079bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
249192e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
249292e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
249392e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
249492e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
249592e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
2496be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2497be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
249847a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
249979bdfe76SSatish Balay            submatrix  (same for all local rows)
250047a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
250192e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
2502db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
250347a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
250479bdfe76SSatish Balay            submatrix (same for all local rows).
250547a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
250692e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
250792e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
250879bdfe76SSatish Balay 
250979bdfe76SSatish Balay    Output Parameter:
251079bdfe76SSatish Balay .  A - the matrix
251179bdfe76SSatish Balay 
2512db81eaa0SLois Curfman McInnes    Options Database Keys:
25138c07d4e3SBarry Smith +   -mat_block_size - size of the blocks to use
25148c07d4e3SBarry Smith -   -mat_use_hash_table <fact>
25153ffaccefSLois Curfman McInnes 
2516175b88e8SBarry Smith    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2517175b88e8SBarry Smith    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
2518175b88e8SBarry Smith    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
2519175b88e8SBarry Smith    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2520175b88e8SBarry Smith 
2521b259b22eSLois Curfman McInnes    Notes:
252249a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
252349a6f317SBarry Smith 
252447a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
252547a75d0bSBarry Smith 
252679bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
252779bdfe76SSatish Balay    (possibly both).
252879bdfe76SSatish Balay 
2529be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2530be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
2531be79a94dSBarry Smith 
253279bdfe76SSatish Balay    Storage Information:
253379bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
253479bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
253579bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
253679bdfe76SSatish Balay    local matrix (a rectangular submatrix).
253779bdfe76SSatish Balay 
253879bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
253979bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
254079bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
254179bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
254279bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
254379bdfe76SSatish Balay 
254479bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
254579bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
254679bdfe76SSatish Balay 
2547db81eaa0SLois Curfman McInnes .vb
2548db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
2549db81eaa0SLois Curfman McInnes           -------------------
2550db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
2551db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
2552db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
2553db81eaa0SLois Curfman McInnes           -------------------
2554db81eaa0SLois Curfman McInnes .ve
255579bdfe76SSatish Balay 
255679bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
255779bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
255879bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
255957b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
256079bdfe76SSatish Balay 
2561d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2562d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
256379bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
256492e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
256592e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
25666da5968aSLois Curfman McInnes    matrices.
256779bdfe76SSatish Balay 
2568027ccd11SLois Curfman McInnes    Level: intermediate
2569027ccd11SLois Curfman McInnes 
257092e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
257179bdfe76SSatish Balay 
2572aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
257379bdfe76SSatish Balay @*/
2574be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
257579bdfe76SSatish Balay {
25766849ba73SBarry Smith   PetscErrorCode ierr;
2577b24ad042SBarry Smith   PetscMPIInt    size;
257879bdfe76SSatish Balay 
2579d64ed03dSBarry Smith   PetscFunctionBegin;
2580f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2581f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2582d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2583273d9f13SBarry Smith   if (size > 1) {
2584273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2585273d9f13SBarry Smith     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2586273d9f13SBarry Smith   } else {
2587273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2588273d9f13SBarry Smith     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
25893914022bSBarry Smith   }
25903a40ed3dSBarry Smith   PetscFunctionReturn(0);
259179bdfe76SSatish Balay }
2592026e39d0SSatish Balay 
25934a2ae208SSatish Balay #undef __FUNCT__
25944a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ"
25956849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
25960ac07820SSatish Balay {
25970ac07820SSatish Balay   Mat            mat;
25980ac07820SSatish Balay   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2599dfbe8321SBarry Smith   PetscErrorCode ierr;
2600b24ad042SBarry Smith   PetscInt       len=0;
26010ac07820SSatish Balay 
2602d64ed03dSBarry Smith   PetscFunctionBegin;
26030ac07820SSatish Balay   *newmat       = 0;
26047adad957SLisandro Dalcin   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2605899cda47SBarry Smith   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
26067adad957SLisandro Dalcin   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
26071d5dac46SHong Zhang   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
26087fff6886SHong Zhang 
26094beb1cfeSHong Zhang   mat->factor       = matin->factor;
2610273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
26110ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
26127fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
26137fff6886SHong Zhang 
2614273d9f13SBarry Smith   a      = (Mat_MPIBAIJ*)mat->data;
2615899cda47SBarry Smith   mat->rmap.bs  = matin->rmap.bs;
26160ac07820SSatish Balay   a->bs2   = oldmat->bs2;
26170ac07820SSatish Balay   a->mbs   = oldmat->mbs;
26180ac07820SSatish Balay   a->nbs   = oldmat->nbs;
26190ac07820SSatish Balay   a->Mbs   = oldmat->Mbs;
26200ac07820SSatish Balay   a->Nbs   = oldmat->Nbs;
26210ac07820SSatish Balay 
26227adad957SLisandro Dalcin   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
26237adad957SLisandro Dalcin   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2624899cda47SBarry Smith 
26250ac07820SSatish Balay   a->size         = oldmat->size;
26260ac07820SSatish Balay   a->rank         = oldmat->rank;
2627aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2628aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2629aef5e8e0SSatish Balay   a->rowindices   = 0;
26300ac07820SSatish Balay   a->rowvalues    = 0;
26310ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
263230793edcSSatish Balay   a->barray       = 0;
2633899cda47SBarry Smith   a->rstartbs     = oldmat->rstartbs;
2634899cda47SBarry Smith   a->rendbs       = oldmat->rendbs;
2635899cda47SBarry Smith   a->cstartbs     = oldmat->cstartbs;
2636899cda47SBarry Smith   a->cendbs       = oldmat->cendbs;
26370ac07820SSatish Balay 
2638133cdb44SSatish Balay   /* hash table stuff */
2639133cdb44SSatish Balay   a->ht           = 0;
2640133cdb44SSatish Balay   a->hd           = 0;
2641133cdb44SSatish Balay   a->ht_size      = 0;
2642133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
264325fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2644133cdb44SSatish Balay   a->ht_total_ct  = 0;
2645133cdb44SSatish Balay   a->ht_insert_ct = 0;
2646133cdb44SSatish Balay 
2647899cda47SBarry Smith   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
26487adad957SLisandro Dalcin   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
26497adad957SLisandro Dalcin   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
26500ac07820SSatish Balay   if (oldmat->colmap) {
2651aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
26520f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
265348e59246SSatish Balay #else
2654b24ad042SBarry Smith   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
265552e6d16bSBarry Smith   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2656b24ad042SBarry Smith   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
265748e59246SSatish Balay #endif
26580ac07820SSatish Balay   } else a->colmap = 0;
26594beb1cfeSHong Zhang 
26600ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2661b24ad042SBarry Smith     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
266252e6d16bSBarry Smith     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2663b24ad042SBarry Smith     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
26640ac07820SSatish Balay   } else a->garray = 0;
26650ac07820SSatish Balay 
26660ac07820SSatish Balay   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
266752e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
26680ac07820SSatish Balay   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
266952e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
26707fff6886SHong Zhang 
26712e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
267252e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
26732e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
267452e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
26757adad957SLisandro Dalcin   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
26760ac07820SSatish Balay   *newmat = mat;
26774beb1cfeSHong Zhang 
26783a40ed3dSBarry Smith   PetscFunctionReturn(0);
26790ac07820SSatish Balay }
268057b952d6SSatish Balay 
2681e090d566SSatish Balay #include "petscsys.h"
268257b952d6SSatish Balay 
26834a2ae208SSatish Balay #undef __FUNCT__
26844a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ"
2685f69a0ea3SMatthew Knepley PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
268657b952d6SSatish Balay {
268757b952d6SSatish Balay   Mat            A;
26886849ba73SBarry Smith   PetscErrorCode ierr;
2689b24ad042SBarry Smith   int            fd;
2690b24ad042SBarry Smith   PetscInt       i,nz,j,rstart,rend;
269187828ca2SBarry Smith   PetscScalar    *vals,*buf;
269257b952d6SSatish Balay   MPI_Comm       comm = ((PetscObject)viewer)->comm;
269357b952d6SSatish Balay   MPI_Status     status;
2694b24ad042SBarry Smith   PetscMPIInt    rank,size,maxnz;
2695b24ad042SBarry Smith   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2696910ba992SMatthew Knepley   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2697167e7480SBarry Smith   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2698dc231df0SBarry Smith   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2699910ba992SMatthew Knepley   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2700dc231df0SBarry Smith   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
270157b952d6SSatish Balay 
2702d64ed03dSBarry Smith   PetscFunctionBegin;
270377925062SSatish Balay   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
27048c07d4e3SBarry Smith     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
27058c07d4e3SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
270657b952d6SSatish Balay 
2707d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2708d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
270957b952d6SSatish Balay   if (!rank) {
2710b0a32e0cSBarry Smith     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2711e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2712552e946dSBarry Smith     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
27136c5fab8fSBarry Smith   }
2714d64ed03dSBarry Smith 
2715b24ad042SBarry Smith   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
271657b952d6SSatish Balay   M = header[1]; N = header[2];
271757b952d6SSatish Balay 
271829bbc08cSBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
271957b952d6SSatish Balay 
272057b952d6SSatish Balay   /*
272157b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
272257b952d6SSatish Balay      divisible by the blocksize
272357b952d6SSatish Balay   */
272457b952d6SSatish Balay   Mbs        = M/bs;
2725dc231df0SBarry Smith   extra_rows = bs - M + bs*Mbs;
272657b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
272757b952d6SSatish Balay   else                  Mbs++;
272857b952d6SSatish Balay   if (extra_rows && !rank) {
27291e2582c4SBarry Smith     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
273057b952d6SSatish Balay   }
2731537820f0SBarry Smith 
273257b952d6SSatish Balay   /* determine ownership of all rows */
273357b952d6SSatish Balay   mbs        = Mbs/size + ((Mbs % size) > rank);
273457b952d6SSatish Balay   m          = mbs*bs;
2735dc231df0SBarry Smith   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2736b24ad042SBarry Smith   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2737167e7480SBarry Smith 
2738167e7480SBarry Smith   /* process 0 needs enough room for process with most rows */
2739167e7480SBarry Smith   if (!rank) {
2740167e7480SBarry Smith     mmax = rowners[1];
2741167e7480SBarry Smith     for (i=2; i<size; i++) {
2742167e7480SBarry Smith       mmax = PetscMax(mmax,rowners[i]);
2743167e7480SBarry Smith     }
2744ca02efcfSSatish Balay     mmax*=bs;
2745167e7480SBarry Smith   } else mmax = m;
2746167e7480SBarry Smith 
274757b952d6SSatish Balay   rowners[0] = 0;
2748cee3aa6bSSatish Balay   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2749cee3aa6bSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
275057b952d6SSatish Balay   rstart = rowners[rank];
275157b952d6SSatish Balay   rend   = rowners[rank+1];
275257b952d6SSatish Balay 
275357b952d6SSatish Balay   /* distribute row lengths to all processors */
275419c38ff2SBarry Smith   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
275557b952d6SSatish Balay   if (!rank) {
2756dc231df0SBarry Smith     mend = m;
2757dc231df0SBarry Smith     if (size == 1) mend = mend - extra_rows;
2758dc231df0SBarry Smith     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2759dc231df0SBarry Smith     for (j=mend; j<m; j++) locrowlens[j] = 1;
2760dc231df0SBarry Smith     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2761b24ad042SBarry Smith     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2762b24ad042SBarry Smith     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2763dc231df0SBarry Smith     for (j=0; j<m; j++) {
2764dc231df0SBarry Smith       procsnz[0] += locrowlens[j];
2765dc231df0SBarry Smith     }
2766dc231df0SBarry Smith     for (i=1; i<size; i++) {
2767dc231df0SBarry Smith       mend = browners[i+1] - browners[i];
2768dc231df0SBarry Smith       if (i == size-1) mend = mend - extra_rows;
2769dc231df0SBarry Smith       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2770dc231df0SBarry Smith       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2771dc231df0SBarry Smith       /* calculate the number of nonzeros on each processor */
2772dc231df0SBarry Smith       for (j=0; j<browners[i+1]-browners[i]; j++) {
277357b952d6SSatish Balay         procsnz[i] += rowlengths[j];
277457b952d6SSatish Balay       }
2775dc231df0SBarry Smith       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277657b952d6SSatish Balay     }
2777606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2778dc231df0SBarry Smith   } else {
2779dc231df0SBarry Smith     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2780dc231df0SBarry Smith   }
278157b952d6SSatish Balay 
2782dc231df0SBarry Smith   if (!rank) {
278357b952d6SSatish Balay     /* determine max buffer needed and allocate it */
27848a8e0b3aSBarry Smith     maxnz = procsnz[0];
2785cdc0ba36SBarry Smith     for (i=1; i<size; i++) {
278657b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
278757b952d6SSatish Balay     }
2788b24ad042SBarry Smith     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
278957b952d6SSatish Balay 
279057b952d6SSatish Balay     /* read in my part of the matrix column indices  */
279157b952d6SSatish Balay     nz     = procsnz[0];
279219c38ff2SBarry Smith     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
279357b952d6SSatish Balay     mycols = ibuf;
2794cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2795e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2796cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2797cee3aa6bSSatish Balay 
279857b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
279957b952d6SSatish Balay     for (i=1; i<size-1; i++) {
280057b952d6SSatish Balay       nz   = procsnz[i];
2801e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2802b24ad042SBarry Smith       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280357b952d6SSatish Balay     }
280457b952d6SSatish Balay     /* read in the stuff for the last proc */
280557b952d6SSatish Balay     if (size != 1) {
280657b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2807e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
280857b952d6SSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2809b24ad042SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
281057b952d6SSatish Balay     }
2811606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2812d64ed03dSBarry Smith   } else {
281357b952d6SSatish Balay     /* determine buffer space needed for message */
281457b952d6SSatish Balay     nz = 0;
281557b952d6SSatish Balay     for (i=0; i<m; i++) {
281657b952d6SSatish Balay       nz += locrowlens[i];
281757b952d6SSatish Balay     }
281819c38ff2SBarry Smith     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
281957b952d6SSatish Balay     mycols = ibuf;
282057b952d6SSatish Balay     /* receive message of column indices*/
2821b24ad042SBarry Smith     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2822b24ad042SBarry Smith     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
282329bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
282457b952d6SSatish Balay   }
282557b952d6SSatish Balay 
282657b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2827dc231df0SBarry Smith   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2828dc231df0SBarry Smith   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2829dc231df0SBarry Smith   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2830dc231df0SBarry Smith   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2831dc231df0SBarry Smith   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
283257b952d6SSatish Balay   rowcount = 0; nzcount = 0;
283357b952d6SSatish Balay   for (i=0; i<mbs; i++) {
283457b952d6SSatish Balay     dcount  = 0;
283557b952d6SSatish Balay     odcount = 0;
283657b952d6SSatish Balay     for (j=0; j<bs; j++) {
283757b952d6SSatish Balay       kmax = locrowlens[rowcount];
283857b952d6SSatish Balay       for (k=0; k<kmax; k++) {
283957b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
284057b952d6SSatish Balay         if (!mask[tmp]) {
284157b952d6SSatish Balay           mask[tmp] = 1;
284257b952d6SSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
284357b952d6SSatish Balay           else masked1[dcount++] = tmp;
284457b952d6SSatish Balay         }
284557b952d6SSatish Balay       }
284657b952d6SSatish Balay       rowcount++;
284757b952d6SSatish Balay     }
2848cee3aa6bSSatish Balay 
284957b952d6SSatish Balay     dlens[i]  = dcount;
285057b952d6SSatish Balay     odlens[i] = odcount;
2851cee3aa6bSSatish Balay 
285257b952d6SSatish Balay     /* zero out the mask elements we set */
285357b952d6SSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
285457b952d6SSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
285557b952d6SSatish Balay   }
2856cee3aa6bSSatish Balay 
285757b952d6SSatish Balay   /* create our matrix */
2858f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2859f69a0ea3SMatthew Knepley   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
286078ae41b4SKris Buschelman   ierr = MatSetType(A,type);CHKERRQ(ierr)
286178ae41b4SKris Buschelman   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
286278ae41b4SKris Buschelman 
286357b952d6SSatish Balay   if (!rank) {
286419c38ff2SBarry Smith     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
286557b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
286657b952d6SSatish Balay     nz = procsnz[0];
286757b952d6SSatish Balay     vals = buf;
2868cee3aa6bSSatish Balay     mycols = ibuf;
2869cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2870e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2871cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2872537820f0SBarry Smith 
287357b952d6SSatish Balay     /* insert into matrix */
287457b952d6SSatish Balay     jj      = rstart*bs;
287557b952d6SSatish Balay     for (i=0; i<m; i++) {
2876dc231df0SBarry Smith       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
287757b952d6SSatish Balay       mycols += locrowlens[i];
287857b952d6SSatish Balay       vals   += locrowlens[i];
287957b952d6SSatish Balay       jj++;
288057b952d6SSatish Balay     }
288157b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
288257b952d6SSatish Balay     for (i=1; i<size-1; i++) {
288357b952d6SSatish Balay       nz   = procsnz[i];
288457b952d6SSatish Balay       vals = buf;
2885e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
28867adad957SLisandro Dalcin       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
288757b952d6SSatish Balay     }
288857b952d6SSatish Balay     /* the last proc */
288957b952d6SSatish Balay     if (size != 1){
289057b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2891cee3aa6bSSatish Balay       vals = buf;
2892e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
289357b952d6SSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
28947adad957SLisandro Dalcin       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
289557b952d6SSatish Balay     }
2896606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2897d64ed03dSBarry Smith   } else {
289857b952d6SSatish Balay     /* receive numeric values */
289919c38ff2SBarry Smith     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
290057b952d6SSatish Balay 
290157b952d6SSatish Balay     /* receive message of values*/
290257b952d6SSatish Balay     vals   = buf;
2903cee3aa6bSSatish Balay     mycols = ibuf;
29047adad957SLisandro Dalcin     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2905ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
290629bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
290757b952d6SSatish Balay 
290857b952d6SSatish Balay     /* insert into matrix */
290957b952d6SSatish Balay     jj      = rstart*bs;
2910cee3aa6bSSatish Balay     for (i=0; i<m; i++) {
2911dc231df0SBarry Smith       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
291257b952d6SSatish Balay       mycols += locrowlens[i];
291357b952d6SSatish Balay       vals   += locrowlens[i];
291457b952d6SSatish Balay       jj++;
291557b952d6SSatish Balay     }
291657b952d6SSatish Balay   }
2917606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2918606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2919606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2920dc231df0SBarry Smith   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2921dc231df0SBarry Smith   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2922dc231df0SBarry Smith   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
29236d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
29246d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
292578ae41b4SKris Buschelman 
292678ae41b4SKris Buschelman   *newmat = A;
29273a40ed3dSBarry Smith   PetscFunctionReturn(0);
292857b952d6SSatish Balay }
292957b952d6SSatish Balay 
29304a2ae208SSatish Balay #undef __FUNCT__
29314a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2932133cdb44SSatish Balay /*@
2933133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2934133cdb44SSatish Balay 
2935133cdb44SSatish Balay    Input Parameters:
2936133cdb44SSatish Balay .  mat  - the matrix
2937133cdb44SSatish Balay .  fact - factor
2938133cdb44SSatish Balay 
2939fee21e36SBarry Smith    Collective on Mat
2940fee21e36SBarry Smith 
29418c890885SBarry Smith    Level: advanced
29428c890885SBarry Smith 
2943133cdb44SSatish Balay   Notes:
29448c07d4e3SBarry Smith    This can also be set by the command line option: -mat_use_hash_table <fact>
2945133cdb44SSatish Balay 
2946133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2947133cdb44SSatish Balay 
2948133cdb44SSatish Balay .seealso: MatSetOption()
2949133cdb44SSatish Balay @*/
2950be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2951133cdb44SSatish Balay {
2952dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscReal);
29535bf65638SKris Buschelman 
29545bf65638SKris Buschelman   PetscFunctionBegin;
29555bf65638SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
29565bf65638SKris Buschelman   if (f) {
29575bf65638SKris Buschelman     ierr = (*f)(mat,fact);CHKERRQ(ierr);
29585bf65638SKris Buschelman   }
29595bf65638SKris Buschelman   PetscFunctionReturn(0);
29605bf65638SKris Buschelman }
29615bf65638SKris Buschelman 
2962be1d678aSKris Buschelman EXTERN_C_BEGIN
29635bf65638SKris Buschelman #undef __FUNCT__
29645bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2965be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
29665bf65638SKris Buschelman {
296725fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2968133cdb44SSatish Balay 
2969133cdb44SSatish Balay   PetscFunctionBegin;
2970133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2971133cdb44SSatish Balay   baij->ht_fact = fact;
2972133cdb44SSatish Balay   PetscFunctionReturn(0);
2973133cdb44SSatish Balay }
2974be1d678aSKris Buschelman EXTERN_C_END
2975f2a5309cSSatish Balay 
29764a2ae208SSatish Balay #undef __FUNCT__
29774a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2978be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2979f2a5309cSSatish Balay {
2980f2a5309cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2981f2a5309cSSatish Balay   PetscFunctionBegin;
2982f2a5309cSSatish Balay   *Ad     = a->A;
2983f2a5309cSSatish Balay   *Ao     = a->B;
2984195d93cdSBarry Smith   *colmap = a->garray;
2985f2a5309cSSatish Balay   PetscFunctionReturn(0);
2986f2a5309cSSatish Balay }
298785535b8eSBarry Smith 
298885535b8eSBarry Smith /*
298985535b8eSBarry Smith     Special version for direct calls from Fortran (to eliminate two function call overheads
299085535b8eSBarry Smith */
299185535b8eSBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
299285535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
299385535b8eSBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
299485535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
299585535b8eSBarry Smith #endif
299685535b8eSBarry Smith 
299785535b8eSBarry Smith #undef __FUNCT__
299885535b8eSBarry Smith #define __FUNCT__ "matmpibiajsetvaluesblocked"
299985535b8eSBarry Smith /*@C
300085535b8eSBarry Smith   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
300185535b8eSBarry Smith 
300285535b8eSBarry Smith   Collective on Mat
300385535b8eSBarry Smith 
300485535b8eSBarry Smith   Input Parameters:
300585535b8eSBarry Smith + mat - the matrix
300685535b8eSBarry Smith . min - number of input rows
300785535b8eSBarry Smith . im - input rows
300885535b8eSBarry Smith . nin - number of input columns
300985535b8eSBarry Smith . in - input columns
301085535b8eSBarry Smith . v - numerical values input
301185535b8eSBarry Smith - addvin - INSERT_VALUES or ADD_VALUES
301285535b8eSBarry Smith 
301385535b8eSBarry Smith   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
301485535b8eSBarry Smith 
301585535b8eSBarry Smith   Level: advanced
301685535b8eSBarry Smith 
301785535b8eSBarry Smith .seealso:   MatSetValuesBlocked()
301885535b8eSBarry Smith @*/
301985535b8eSBarry Smith PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
302085535b8eSBarry Smith {
302185535b8eSBarry Smith   /* convert input arguments to C version */
302285535b8eSBarry Smith   Mat             mat = *matin;
302385535b8eSBarry Smith   PetscInt        m = *min, n = *nin;
302485535b8eSBarry Smith   InsertMode      addv = *addvin;
302585535b8eSBarry Smith 
302685535b8eSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
302785535b8eSBarry Smith   const MatScalar *value;
302885535b8eSBarry Smith   MatScalar       *barray=baij->barray;
302985535b8eSBarry Smith   PetscTruth      roworiented = baij->roworiented;
303085535b8eSBarry Smith   PetscErrorCode  ierr;
303185535b8eSBarry Smith   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
303285535b8eSBarry Smith   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
303385535b8eSBarry Smith   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
303485535b8eSBarry Smith 
303585535b8eSBarry Smith   PetscFunctionBegin;
303685535b8eSBarry Smith   /* tasks normally handled by MatSetValuesBlocked() */
303785535b8eSBarry Smith   if (mat->insertmode == NOT_SET_VALUES) {
303885535b8eSBarry Smith     mat->insertmode = addv;
303985535b8eSBarry Smith   }
304085535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
304185535b8eSBarry Smith   else if (mat->insertmode != addv) {
304285535b8eSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
304385535b8eSBarry Smith   }
304485535b8eSBarry Smith   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
304585535b8eSBarry Smith #endif
304685535b8eSBarry Smith   if (mat->assembled) {
304785535b8eSBarry Smith     mat->was_assembled = PETSC_TRUE;
304885535b8eSBarry Smith     mat->assembled     = PETSC_FALSE;
304985535b8eSBarry Smith   }
305085535b8eSBarry Smith   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
305185535b8eSBarry Smith 
305285535b8eSBarry Smith 
305385535b8eSBarry Smith   if(!barray) {
305485535b8eSBarry Smith     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
305585535b8eSBarry Smith     baij->barray = barray;
305685535b8eSBarry Smith   }
305785535b8eSBarry Smith 
305885535b8eSBarry Smith   if (roworiented) {
305985535b8eSBarry Smith     stepval = (n-1)*bs;
306085535b8eSBarry Smith   } else {
306185535b8eSBarry Smith     stepval = (m-1)*bs;
306285535b8eSBarry Smith   }
306385535b8eSBarry Smith   for (i=0; i<m; i++) {
306485535b8eSBarry Smith     if (im[i] < 0) continue;
306585535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
306685535b8eSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
306785535b8eSBarry Smith #endif
306885535b8eSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
306985535b8eSBarry Smith       row = im[i] - rstart;
307085535b8eSBarry Smith       for (j=0; j<n; j++) {
307185535b8eSBarry Smith         /* If NumCol = 1 then a copy is not required */
307285535b8eSBarry Smith         if ((roworiented) && (n == 1)) {
307385535b8eSBarry Smith           barray = (MatScalar*)v + i*bs2;
307485535b8eSBarry Smith         } else if((!roworiented) && (m == 1)) {
307585535b8eSBarry Smith           barray = (MatScalar*)v + j*bs2;
307685535b8eSBarry Smith         } else { /* Here a copy is required */
307785535b8eSBarry Smith           if (roworiented) {
307885535b8eSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
307985535b8eSBarry Smith           } else {
308085535b8eSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
308185535b8eSBarry Smith           }
308285535b8eSBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
308385535b8eSBarry Smith             for (jj=0; jj<bs; jj++) {
308485535b8eSBarry Smith               *barray++  = *value++;
308585535b8eSBarry Smith             }
308685535b8eSBarry Smith           }
308785535b8eSBarry Smith           barray -=bs2;
308885535b8eSBarry Smith         }
308985535b8eSBarry Smith 
309085535b8eSBarry Smith         if (in[j] >= cstart && in[j] < cend){
309185535b8eSBarry Smith           col  = in[j] - cstart;
309285535b8eSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
309385535b8eSBarry Smith         }
309485535b8eSBarry Smith         else if (in[j] < 0) continue;
309585535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
309685535b8eSBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
309785535b8eSBarry Smith #endif
309885535b8eSBarry Smith         else {
309985535b8eSBarry Smith           if (mat->was_assembled) {
310085535b8eSBarry Smith             if (!baij->colmap) {
310185535b8eSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
310285535b8eSBarry Smith             }
310385535b8eSBarry Smith 
310485535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
310585535b8eSBarry Smith #if defined (PETSC_USE_CTABLE)
310685535b8eSBarry Smith             { PetscInt data;
310785535b8eSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
310885535b8eSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
310985535b8eSBarry Smith             }
311085535b8eSBarry Smith #else
311185535b8eSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
311285535b8eSBarry Smith #endif
311385535b8eSBarry Smith #endif
311485535b8eSBarry Smith #if defined (PETSC_USE_CTABLE)
311585535b8eSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
311685535b8eSBarry Smith             col  = (col - 1)/bs;
311785535b8eSBarry Smith #else
311885535b8eSBarry Smith             col = (baij->colmap[in[j]] - 1)/bs;
311985535b8eSBarry Smith #endif
312085535b8eSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
312185535b8eSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
312285535b8eSBarry Smith               col =  in[j];
312385535b8eSBarry Smith             }
312485535b8eSBarry Smith           }
312585535b8eSBarry Smith           else col = in[j];
312685535b8eSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312785535b8eSBarry Smith         }
312885535b8eSBarry Smith       }
312985535b8eSBarry Smith     } else {
313085535b8eSBarry Smith       if (!baij->donotstash) {
313185535b8eSBarry Smith         if (roworiented) {
313285535b8eSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
313385535b8eSBarry Smith         } else {
313485535b8eSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
313585535b8eSBarry Smith         }
313685535b8eSBarry Smith       }
313785535b8eSBarry Smith     }
313885535b8eSBarry Smith   }
313985535b8eSBarry Smith 
314085535b8eSBarry Smith   /* task normally handled by MatSetValuesBlocked() */
314185535b8eSBarry Smith   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
314285535b8eSBarry Smith   PetscFunctionReturn(0);
314385535b8eSBarry Smith }
3144