xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 618cc2edce4208c1fcab92c6bd2446942ca564c1)
1a30f8f8cSSatish Balay 
2c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
5c6db04a5SJed Brown #include <petscblaslapack.h>
6a30f8f8cSSatish Balay 
76214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
8cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
96214f412SHong Zhang #endif
10b147fbf3SStefano Zampini 
11b147fbf3SStefano Zampini /* This could be moved to matimpl.h */
12b147fbf3SStefano Zampini static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
13b147fbf3SStefano Zampini {
14b147fbf3SStefano Zampini   Mat            preallocator;
15b147fbf3SStefano Zampini   PetscInt       r,rstart,rend;
16b147fbf3SStefano Zampini   PetscInt       bs,i,m,n,M,N;
17b147fbf3SStefano Zampini   PetscBool      cong = PETSC_TRUE;
18b147fbf3SStefano Zampini   PetscErrorCode ierr;
19b147fbf3SStefano Zampini 
20b147fbf3SStefano Zampini   PetscFunctionBegin;
21b147fbf3SStefano Zampini   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
22b147fbf3SStefano Zampini   PetscValidLogicalCollectiveInt(B,nm,2);
23b147fbf3SStefano Zampini   for (i = 0; i < nm; i++) {
24b147fbf3SStefano Zampini     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
25b147fbf3SStefano Zampini     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
26b147fbf3SStefano Zampini     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
27b147fbf3SStefano Zampini   }
28b147fbf3SStefano Zampini   PetscValidLogicalCollectiveBool(B,fill,5);
29b147fbf3SStefano Zampini   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
30b147fbf3SStefano Zampini   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
31b147fbf3SStefano Zampini   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
32b147fbf3SStefano Zampini   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
33b147fbf3SStefano Zampini   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
34b147fbf3SStefano Zampini   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
35b147fbf3SStefano Zampini   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
36b147fbf3SStefano Zampini   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
37b147fbf3SStefano Zampini   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
38b147fbf3SStefano Zampini   for (r = rstart; r < rend; ++r) {
39b147fbf3SStefano Zampini     PetscInt          ncols;
40b147fbf3SStefano Zampini     const PetscInt    *row;
41b147fbf3SStefano Zampini     const PetscScalar *vals;
42b147fbf3SStefano Zampini 
43b147fbf3SStefano Zampini     for (i = 0; i < nm; i++) {
44b147fbf3SStefano Zampini       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
45b147fbf3SStefano Zampini       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
46b147fbf3SStefano Zampini       if (symm && symm[i]) {
47b147fbf3SStefano Zampini         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
48b147fbf3SStefano Zampini       }
49b147fbf3SStefano Zampini       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
50b147fbf3SStefano Zampini     }
51b147fbf3SStefano Zampini   }
52b147fbf3SStefano Zampini   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
53b147fbf3SStefano Zampini   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
54b147fbf3SStefano Zampini   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
55b147fbf3SStefano Zampini   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
56b147fbf3SStefano Zampini   PetscFunctionReturn(0);
57b147fbf3SStefano Zampini }
58b147fbf3SStefano Zampini 
5928d58a37SPierre Jolivet PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60b147fbf3SStefano Zampini {
61b147fbf3SStefano Zampini   Mat            B;
62b147fbf3SStefano Zampini   PetscErrorCode ierr;
63b147fbf3SStefano Zampini   PetscInt       r;
64b147fbf3SStefano Zampini 
65b147fbf3SStefano Zampini   PetscFunctionBegin;
66b147fbf3SStefano Zampini   if (reuse != MAT_REUSE_MATRIX) {
6728d58a37SPierre Jolivet     PetscBool symm = PETSC_TRUE,isdense;
68b147fbf3SStefano Zampini     PetscInt  bs;
69b147fbf3SStefano Zampini 
70b147fbf3SStefano Zampini     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
71b147fbf3SStefano Zampini     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
72b147fbf3SStefano Zampini     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
73b147fbf3SStefano Zampini     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
74b147fbf3SStefano Zampini     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
75b147fbf3SStefano Zampini     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
76b147fbf3SStefano Zampini     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
7728d58a37SPierre Jolivet     ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr);
7828d58a37SPierre Jolivet     if (!isdense) {
79b147fbf3SStefano Zampini       ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
80b147fbf3SStefano Zampini       ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
81b147fbf3SStefano Zampini       ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
8228d58a37SPierre Jolivet     } else {
8328d58a37SPierre Jolivet       ierr = MatSetUp(B);CHKERRQ(ierr);
8428d58a37SPierre Jolivet     }
8528d58a37SPierre Jolivet   } else {
8628d58a37SPierre Jolivet     B    = *newmat;
8728d58a37SPierre Jolivet     ierr = MatZeroEntries(B);CHKERRQ(ierr);
8828d58a37SPierre Jolivet   }
89b147fbf3SStefano Zampini 
90b147fbf3SStefano Zampini   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
91b147fbf3SStefano Zampini   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
92b147fbf3SStefano Zampini     PetscInt          ncols;
93b147fbf3SStefano Zampini     const PetscInt    *row;
94b147fbf3SStefano Zampini     const PetscScalar *vals;
95b147fbf3SStefano Zampini 
96b147fbf3SStefano Zampini     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
97b147fbf3SStefano Zampini     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
98eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
99eb1ec7c1SStefano Zampini     if (A->hermitian) {
100eb1ec7c1SStefano Zampini       PetscInt i;
101eb1ec7c1SStefano Zampini       for (i = 0; i < ncols; i++) {
102eb1ec7c1SStefano Zampini         ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr);
103eb1ec7c1SStefano Zampini       }
104eb1ec7c1SStefano Zampini     } else {
105b147fbf3SStefano Zampini       ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
106eb1ec7c1SStefano Zampini     }
107eb1ec7c1SStefano Zampini #else
108eb1ec7c1SStefano Zampini     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
109eb1ec7c1SStefano Zampini #endif
110b147fbf3SStefano Zampini     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
111b147fbf3SStefano Zampini   }
112b147fbf3SStefano Zampini   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
113b147fbf3SStefano Zampini   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
114b147fbf3SStefano Zampini   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
115b147fbf3SStefano Zampini 
116b147fbf3SStefano Zampini   if (reuse == MAT_INPLACE_MATRIX) {
117b147fbf3SStefano Zampini     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
118b147fbf3SStefano Zampini   } else {
119b147fbf3SStefano Zampini     *newmat = B;
120b147fbf3SStefano Zampini   }
121b147fbf3SStefano Zampini   PetscFunctionReturn(0);
122b147fbf3SStefano Zampini }
123b147fbf3SStefano Zampini 
1247087cfbeSBarry Smith PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
125a30f8f8cSSatish Balay {
126f3566a2aSHong Zhang   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
127dfbe8321SBarry Smith   PetscErrorCode ierr;
128a30f8f8cSSatish Balay 
129a30f8f8cSSatish Balay   PetscFunctionBegin;
130a30f8f8cSSatish Balay   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
131a30f8f8cSSatish Balay   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
132a30f8f8cSSatish Balay   PetscFunctionReturn(0);
133a30f8f8cSSatish Balay }
134a30f8f8cSSatish Balay 
1357087cfbeSBarry Smith PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
136a30f8f8cSSatish Balay {
137f3566a2aSHong Zhang   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
138dfbe8321SBarry Smith   PetscErrorCode ierr;
139a30f8f8cSSatish Balay 
140a30f8f8cSSatish Balay   PetscFunctionBegin;
141a30f8f8cSSatish Balay   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
142a30f8f8cSSatish Balay   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
143a30f8f8cSSatish Balay   PetscFunctionReturn(0);
144a30f8f8cSSatish Balay }
145a30f8f8cSSatish Balay 
146d40312a9SBarry Smith #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
147a30f8f8cSSatish Balay   { \
148a30f8f8cSSatish Balay     brow = row/bs;  \
149a30f8f8cSSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
150a30f8f8cSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
151a30f8f8cSSatish Balay     bcol = col/bs; \
152a30f8f8cSSatish Balay     ridx = row % bs; cidx = col % bs; \
153a30f8f8cSSatish Balay     low  = 0; high = nrow; \
154a30f8f8cSSatish Balay     while (high-low > 3) { \
155a30f8f8cSSatish Balay       t = (low+high)/2; \
156a30f8f8cSSatish Balay       if (rp[t] > bcol) high = t; \
157a30f8f8cSSatish Balay       else              low  = t; \
158a30f8f8cSSatish Balay     } \
159a30f8f8cSSatish Balay     for (_i=low; _i<high; _i++) { \
160a30f8f8cSSatish Balay       if (rp[_i] > bcol) break; \
161a30f8f8cSSatish Balay       if (rp[_i] == bcol) { \
162a30f8f8cSSatish Balay         bap = ap + bs2*_i + bs*cidx + ridx; \
163a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *bap += value;  \
164a30f8f8cSSatish Balay         else                    *bap  = value;  \
165a30f8f8cSSatish Balay         goto a_noinsert; \
166a30f8f8cSSatish Balay       } \
167a30f8f8cSSatish Balay     } \
168a30f8f8cSSatish Balay     if (a->nonew == 1) goto a_noinsert; \
169d40312a9SBarry Smith     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
170fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
171a30f8f8cSSatish Balay     N = nrow++ - 1;  \
172a30f8f8cSSatish Balay     /* shift up all the later entries in this row */ \
173580bdb30SBarry Smith     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
174580bdb30SBarry Smith     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
175580bdb30SBarry Smith     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
176a30f8f8cSSatish Balay     rp[_i]                      = bcol;  \
177a30f8f8cSSatish Balay     ap[bs2*_i + bs*cidx + ridx] = value;  \
178e56f5c9eSBarry Smith     A->nonzerostate++;\
179a30f8f8cSSatish Balay a_noinsert:; \
180a30f8f8cSSatish Balay     ailen[brow] = nrow; \
181a30f8f8cSSatish Balay   }
182e5e170daSBarry Smith 
183d40312a9SBarry Smith #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
184a30f8f8cSSatish Balay   { \
185a30f8f8cSSatish Balay     brow = row/bs;  \
186a30f8f8cSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
187a30f8f8cSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
188a30f8f8cSSatish Balay     bcol = col/bs; \
189a30f8f8cSSatish Balay     ridx = row % bs; cidx = col % bs; \
190a30f8f8cSSatish Balay     low  = 0; high = nrow; \
191a30f8f8cSSatish Balay     while (high-low > 3) { \
192a30f8f8cSSatish Balay       t = (low+high)/2; \
193a30f8f8cSSatish Balay       if (rp[t] > bcol) high = t; \
194a30f8f8cSSatish Balay       else              low  = t; \
195a30f8f8cSSatish Balay     } \
196a30f8f8cSSatish Balay     for (_i=low; _i<high; _i++) { \
197a30f8f8cSSatish Balay       if (rp[_i] > bcol) break; \
198a30f8f8cSSatish Balay       if (rp[_i] == bcol) { \
199a30f8f8cSSatish Balay         bap = ap + bs2*_i + bs*cidx + ridx; \
200a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *bap += value;  \
201a30f8f8cSSatish Balay         else                    *bap  = value;  \
202a30f8f8cSSatish Balay         goto b_noinsert; \
203a30f8f8cSSatish Balay       } \
204a30f8f8cSSatish Balay     } \
205a30f8f8cSSatish Balay     if (b->nonew == 1) goto b_noinsert; \
206d40312a9SBarry Smith     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
207fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
208a30f8f8cSSatish Balay     N = nrow++ - 1;  \
209a30f8f8cSSatish Balay     /* shift up all the later entries in this row */ \
210580bdb30SBarry Smith     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
211580bdb30SBarry Smith     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
212580bdb30SBarry Smith     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
213a30f8f8cSSatish Balay     rp[_i]                      = bcol;  \
214a30f8f8cSSatish Balay     ap[bs2*_i + bs*cidx + ridx] = value;  \
215e56f5c9eSBarry Smith     B->nonzerostate++;\
216a30f8f8cSSatish Balay b_noinsert:; \
217a30f8f8cSSatish Balay     bilen[brow] = nrow; \
218a30f8f8cSSatish Balay   }
219a30f8f8cSSatish Balay 
220a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks).
221476417e5SBarry Smith    Any a(i,j) with i>j input by user is ingored or generates an error
222a30f8f8cSSatish Balay */
223dd6ea824SBarry Smith PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
224a30f8f8cSSatish Balay {
225a30f8f8cSSatish Balay   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
226a30f8f8cSSatish Balay   MatScalar      value;
227ace3abfcSBarry Smith   PetscBool      roworiented = baij->roworiented;
228dfbe8321SBarry Smith   PetscErrorCode ierr;
2291302d50aSBarry Smith   PetscInt       i,j,row,col;
230d0f46423SBarry Smith   PetscInt       rstart_orig=mat->rmap->rstart;
231d0f46423SBarry Smith   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
232d0f46423SBarry Smith   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
233a30f8f8cSSatish Balay 
234a30f8f8cSSatish Balay   /* Some Variables required in the macro */
235a30f8f8cSSatish Balay   Mat          A     = baij->A;
236a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
2371302d50aSBarry Smith   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
238a30f8f8cSSatish Balay   MatScalar    *aa   =a->a;
239a30f8f8cSSatish Balay 
240a30f8f8cSSatish Balay   Mat         B     = baij->B;
241a30f8f8cSSatish Balay   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
2421302d50aSBarry Smith   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
243a30f8f8cSSatish Balay   MatScalar   *ba   =b->a;
244a30f8f8cSSatish Balay 
2451302d50aSBarry Smith   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
2461302d50aSBarry Smith   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
247a30f8f8cSSatish Balay   MatScalar *ap,*bap;
248a30f8f8cSSatish Balay 
249a30f8f8cSSatish Balay   /* for stash */
2500298fd71SBarry Smith   PetscInt  n_loc, *in_loc = NULL;
2510298fd71SBarry Smith   MatScalar *v_loc = NULL;
252a30f8f8cSSatish Balay 
253a30f8f8cSSatish Balay   PetscFunctionBegin;
254a30f8f8cSSatish Balay   if (!baij->donotstash) {
25559ffdab8SBarry Smith     if (n > baij->n_loc) {
25659ffdab8SBarry Smith       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
25759ffdab8SBarry Smith       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
258785e854fSJed Brown       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
259785e854fSJed Brown       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
26026fbe8dcSKarl Rupp 
26159ffdab8SBarry Smith       baij->n_loc = n;
26259ffdab8SBarry Smith     }
26359ffdab8SBarry Smith     in_loc = baij->in_loc;
26459ffdab8SBarry Smith     v_loc  = baij->v_loc;
265a30f8f8cSSatish Balay   }
266a30f8f8cSSatish Balay 
267a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
268a30f8f8cSSatish Balay     if (im[i] < 0) continue;
2692515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
270e32f2f54SBarry Smith     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
271a30f8f8cSSatish Balay #endif
272a30f8f8cSSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
273a30f8f8cSSatish Balay       row = im[i] - rstart_orig;              /* local row index */
274a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
27501b2bd88SHong Zhang         if (im[i]/bs > in[j]/bs) {
27601b2bd88SHong Zhang           if (a->ignore_ltriangular) {
27701b2bd88SHong Zhang             continue;    /* ignore lower triangular blocks */
27826fbe8dcSKarl Rupp           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
27901b2bd88SHong Zhang         }
280a30f8f8cSSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
281a30f8f8cSSatish Balay           col  = in[j] - cstart_orig;         /* local col index */
282a30f8f8cSSatish Balay           brow = row/bs; bcol = col/bs;
283a30f8f8cSSatish Balay           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
284db4deed7SKarl Rupp           if (roworiented) value = v[i*n+j];
285db4deed7SKarl Rupp           else             value = v[i+j*m];
286d40312a9SBarry Smith           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
287a30f8f8cSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
288a30f8f8cSSatish Balay         } else if (in[j] < 0) continue;
2892515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
290cb9801acSJed Brown         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
291a30f8f8cSSatish Balay #endif
292a30f8f8cSSatish Balay         else {  /* off-diag entry (B) */
293a30f8f8cSSatish Balay           if (mat->was_assembled) {
294a30f8f8cSSatish Balay             if (!baij->colmap) {
295ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
296a30f8f8cSSatish Balay             }
297a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
298a30f8f8cSSatish Balay             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
29971730473SSatish Balay             col  = col - 1;
300a30f8f8cSSatish Balay #else
30171730473SSatish Balay             col = baij->colmap[in[j]/bs] - 1;
302a30f8f8cSSatish Balay #endif
303a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
304ab9863d7SBarry Smith               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
305a30f8f8cSSatish Balay               col  =  in[j];
306a30f8f8cSSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
307a30f8f8cSSatish Balay               B    = baij->B;
308a30f8f8cSSatish Balay               b    = (Mat_SeqBAIJ*)(B)->data;
309a30f8f8cSSatish Balay               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
310a30f8f8cSSatish Balay               ba   = b->a;
31171730473SSatish Balay             } else col += in[j]%bs;
312a30f8f8cSSatish Balay           } else col = in[j];
313db4deed7SKarl Rupp           if (roworiented) value = v[i*n+j];
314db4deed7SKarl Rupp           else             value = v[i+j*m];
315d40312a9SBarry Smith           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
316a30f8f8cSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
317a30f8f8cSSatish Balay         }
318a30f8f8cSSatish Balay       }
319a30f8f8cSSatish Balay     } else {  /* off processor entry */
3204cb17eb5SBarry Smith       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
321a30f8f8cSSatish Balay       if (!baij->donotstash) {
3225080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
323a30f8f8cSSatish Balay         n_loc          = 0;
324a30f8f8cSSatish Balay         for (j=0; j<n; j++) {
325f65c83cfSHong Zhang           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
326a30f8f8cSSatish Balay           in_loc[n_loc] = in[j];
327a30f8f8cSSatish Balay           if (roworiented) {
328a30f8f8cSSatish Balay             v_loc[n_loc] = v[i*n+j];
329a30f8f8cSSatish Balay           } else {
330a30f8f8cSSatish Balay             v_loc[n_loc] = v[j*m+i];
331a30f8f8cSSatish Balay           }
332a30f8f8cSSatish Balay           n_loc++;
333a30f8f8cSSatish Balay         }
334b400d20cSBarry Smith         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
335a30f8f8cSSatish Balay       }
336a30f8f8cSSatish Balay     }
337a30f8f8cSSatish Balay   }
338a30f8f8cSSatish Balay   PetscFunctionReturn(0);
339a30f8f8cSSatish Balay }
340a30f8f8cSSatish Balay 
34136bd2089SBarry Smith PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
34236bd2089SBarry Smith {
34336bd2089SBarry Smith   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
34436bd2089SBarry Smith   PetscErrorCode    ierr;
34536bd2089SBarry Smith   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
34636bd2089SBarry Smith   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
34736bd2089SBarry Smith   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
34836bd2089SBarry Smith   PetscBool         roworiented=a->roworiented;
34936bd2089SBarry Smith   const PetscScalar *value     = v;
35036bd2089SBarry Smith   MatScalar         *ap,*aa = a->a,*bap;
35136bd2089SBarry Smith 
35236bd2089SBarry Smith   PetscFunctionBegin;
35336bd2089SBarry Smith   if (col < row) {
35436bd2089SBarry Smith     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
35536bd2089SBarry Smith     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
35636bd2089SBarry Smith   }
35736bd2089SBarry Smith   rp   = aj + ai[row];
35836bd2089SBarry Smith   ap   = aa + bs2*ai[row];
35936bd2089SBarry Smith   rmax = imax[row];
36036bd2089SBarry Smith   nrow = ailen[row];
36136bd2089SBarry Smith   value = v;
36236bd2089SBarry Smith   low   = 0;
36336bd2089SBarry Smith   high  = nrow;
36436bd2089SBarry Smith 
36536bd2089SBarry Smith   while (high-low > 7) {
36636bd2089SBarry Smith     t = (low+high)/2;
36736bd2089SBarry Smith     if (rp[t] > col) high = t;
36836bd2089SBarry Smith     else             low  = t;
36936bd2089SBarry Smith   }
37036bd2089SBarry Smith   for (i=low; i<high; i++) {
37136bd2089SBarry Smith     if (rp[i] > col) break;
37236bd2089SBarry Smith     if (rp[i] == col) {
37336bd2089SBarry Smith       bap = ap +  bs2*i;
37436bd2089SBarry Smith       if (roworiented) {
37536bd2089SBarry Smith         if (is == ADD_VALUES) {
37636bd2089SBarry Smith           for (ii=0; ii<bs; ii++) {
37736bd2089SBarry Smith             for (jj=ii; jj<bs2; jj+=bs) {
37836bd2089SBarry Smith               bap[jj] += *value++;
37936bd2089SBarry Smith             }
38036bd2089SBarry Smith           }
38136bd2089SBarry Smith         } else {
38236bd2089SBarry Smith           for (ii=0; ii<bs; ii++) {
38336bd2089SBarry Smith             for (jj=ii; jj<bs2; jj+=bs) {
38436bd2089SBarry Smith               bap[jj] = *value++;
38536bd2089SBarry Smith             }
38636bd2089SBarry Smith           }
38736bd2089SBarry Smith         }
38836bd2089SBarry Smith       } else {
38936bd2089SBarry Smith         if (is == ADD_VALUES) {
39036bd2089SBarry Smith           for (ii=0; ii<bs; ii++) {
39136bd2089SBarry Smith             for (jj=0; jj<bs; jj++) {
39236bd2089SBarry Smith               *bap++ += *value++;
39336bd2089SBarry Smith             }
39436bd2089SBarry Smith           }
39536bd2089SBarry Smith         } else {
39636bd2089SBarry Smith           for (ii=0; ii<bs; ii++) {
39736bd2089SBarry Smith             for (jj=0; jj<bs; jj++) {
39836bd2089SBarry Smith               *bap++  = *value++;
39936bd2089SBarry Smith             }
40036bd2089SBarry Smith           }
40136bd2089SBarry Smith         }
40236bd2089SBarry Smith       }
40336bd2089SBarry Smith       goto noinsert2;
40436bd2089SBarry Smith     }
40536bd2089SBarry Smith   }
40636bd2089SBarry Smith   if (nonew == 1) goto noinsert2;
40736bd2089SBarry Smith   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
40836bd2089SBarry Smith   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
40936bd2089SBarry Smith   N = nrow++ - 1; high++;
41036bd2089SBarry Smith   /* shift up all the later entries in this row */
411580bdb30SBarry Smith   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
412580bdb30SBarry Smith   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
41336bd2089SBarry Smith   rp[i] = col;
41436bd2089SBarry Smith   bap   = ap +  bs2*i;
41536bd2089SBarry Smith   if (roworiented) {
41636bd2089SBarry Smith     for (ii=0; ii<bs; ii++) {
41736bd2089SBarry Smith       for (jj=ii; jj<bs2; jj+=bs) {
41836bd2089SBarry Smith         bap[jj] = *value++;
41936bd2089SBarry Smith       }
42036bd2089SBarry Smith     }
42136bd2089SBarry Smith   } else {
42236bd2089SBarry Smith     for (ii=0; ii<bs; ii++) {
42336bd2089SBarry Smith       for (jj=0; jj<bs; jj++) {
42436bd2089SBarry Smith         *bap++ = *value++;
42536bd2089SBarry Smith       }
42636bd2089SBarry Smith     }
42736bd2089SBarry Smith   }
42836bd2089SBarry Smith   noinsert2:;
42936bd2089SBarry Smith   ailen[row] = nrow;
43036bd2089SBarry Smith   PetscFunctionReturn(0);
43136bd2089SBarry Smith }
43236bd2089SBarry Smith 
43336bd2089SBarry Smith /*
43436bd2089SBarry Smith    This routine is exactly duplicated in mpibaij.c
43536bd2089SBarry Smith */
43636bd2089SBarry Smith PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
43736bd2089SBarry Smith {
43836bd2089SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
43936bd2089SBarry Smith   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
44036bd2089SBarry Smith   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
44136bd2089SBarry Smith   PetscErrorCode    ierr;
44236bd2089SBarry Smith   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
44336bd2089SBarry Smith   PetscBool         roworiented=a->roworiented;
44436bd2089SBarry Smith   const PetscScalar *value     = v;
44536bd2089SBarry Smith   MatScalar         *ap,*aa = a->a,*bap;
44636bd2089SBarry Smith 
44736bd2089SBarry Smith   PetscFunctionBegin;
44836bd2089SBarry Smith   rp   = aj + ai[row];
44936bd2089SBarry Smith   ap   = aa + bs2*ai[row];
45036bd2089SBarry Smith   rmax = imax[row];
45136bd2089SBarry Smith   nrow = ailen[row];
45236bd2089SBarry Smith   low  = 0;
45336bd2089SBarry Smith   high = nrow;
45436bd2089SBarry Smith   value = v;
45536bd2089SBarry Smith   while (high-low > 7) {
45636bd2089SBarry Smith     t = (low+high)/2;
45736bd2089SBarry Smith     if (rp[t] > col) high = t;
45836bd2089SBarry Smith     else             low  = t;
45936bd2089SBarry Smith   }
46036bd2089SBarry Smith   for (i=low; i<high; i++) {
46136bd2089SBarry Smith     if (rp[i] > col) break;
46236bd2089SBarry Smith     if (rp[i] == col) {
46336bd2089SBarry Smith       bap = ap +  bs2*i;
46436bd2089SBarry Smith       if (roworiented) {
46536bd2089SBarry Smith         if (is == ADD_VALUES) {
46636bd2089SBarry Smith           for (ii=0; ii<bs; ii++) {
46736bd2089SBarry Smith             for (jj=ii; jj<bs2; jj+=bs) {
46836bd2089SBarry Smith               bap[jj] += *value++;
46936bd2089SBarry Smith             }
47036bd2089SBarry Smith           }
47136bd2089SBarry Smith         } else {
47236bd2089SBarry Smith           for (ii=0; ii<bs; ii++) {
47336bd2089SBarry Smith             for (jj=ii; jj<bs2; jj+=bs) {
47436bd2089SBarry Smith               bap[jj] = *value++;
47536bd2089SBarry Smith             }
47636bd2089SBarry Smith           }
47736bd2089SBarry Smith         }
47836bd2089SBarry Smith       } else {
47936bd2089SBarry Smith         if (is == ADD_VALUES) {
48036bd2089SBarry Smith           for (ii=0; ii<bs; ii++,value+=bs) {
48136bd2089SBarry Smith             for (jj=0; jj<bs; jj++) {
48236bd2089SBarry Smith               bap[jj] += value[jj];
48336bd2089SBarry Smith             }
48436bd2089SBarry Smith             bap += bs;
48536bd2089SBarry Smith           }
48636bd2089SBarry Smith         } else {
48736bd2089SBarry Smith           for (ii=0; ii<bs; ii++,value+=bs) {
48836bd2089SBarry Smith             for (jj=0; jj<bs; jj++) {
48936bd2089SBarry Smith               bap[jj]  = value[jj];
49036bd2089SBarry Smith             }
49136bd2089SBarry Smith             bap += bs;
49236bd2089SBarry Smith           }
49336bd2089SBarry Smith         }
49436bd2089SBarry Smith       }
49536bd2089SBarry Smith       goto noinsert2;
49636bd2089SBarry Smith     }
49736bd2089SBarry Smith   }
49836bd2089SBarry Smith   if (nonew == 1) goto noinsert2;
49936bd2089SBarry Smith   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
50036bd2089SBarry Smith   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
50136bd2089SBarry Smith   N = nrow++ - 1; high++;
50236bd2089SBarry Smith   /* shift up all the later entries in this row */
503580bdb30SBarry Smith   ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
504580bdb30SBarry Smith   ierr  = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
50536bd2089SBarry Smith   rp[i] = col;
50636bd2089SBarry Smith   bap   = ap +  bs2*i;
50736bd2089SBarry Smith   if (roworiented) {
50836bd2089SBarry Smith     for (ii=0; ii<bs; ii++) {
50936bd2089SBarry Smith       for (jj=ii; jj<bs2; jj+=bs) {
51036bd2089SBarry Smith         bap[jj] = *value++;
51136bd2089SBarry Smith       }
51236bd2089SBarry Smith     }
51336bd2089SBarry Smith   } else {
51436bd2089SBarry Smith     for (ii=0; ii<bs; ii++) {
51536bd2089SBarry Smith       for (jj=0; jj<bs; jj++) {
51636bd2089SBarry Smith         *bap++ = *value++;
51736bd2089SBarry Smith       }
51836bd2089SBarry Smith     }
51936bd2089SBarry Smith   }
52036bd2089SBarry Smith   noinsert2:;
52136bd2089SBarry Smith   ailen[row] = nrow;
52236bd2089SBarry Smith   PetscFunctionReturn(0);
52336bd2089SBarry Smith }
52436bd2089SBarry Smith 
52536bd2089SBarry Smith /*
52636bd2089SBarry Smith     This routine could be optimized by removing the need for the block copy below and passing stride information
52736bd2089SBarry Smith   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
52836bd2089SBarry Smith */
529dd6ea824SBarry Smith PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
530a30f8f8cSSatish Balay {
5310880e062SHong Zhang   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
532f15d580aSBarry Smith   const MatScalar *value;
533f15d580aSBarry Smith   MatScalar       *barray     =baij->barray;
534ace3abfcSBarry Smith   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
535dfbe8321SBarry Smith   PetscErrorCode  ierr;
536899cda47SBarry Smith   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
537476417e5SBarry Smith   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
538476417e5SBarry Smith   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
5390880e062SHong Zhang 
540a30f8f8cSSatish Balay   PetscFunctionBegin;
5410880e062SHong Zhang   if (!barray) {
542785e854fSJed Brown     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
5430880e062SHong Zhang     baij->barray = barray;
5440880e062SHong Zhang   }
5450880e062SHong Zhang 
5460880e062SHong Zhang   if (roworiented) {
5470880e062SHong Zhang     stepval = (n-1)*bs;
5480880e062SHong Zhang   } else {
5490880e062SHong Zhang     stepval = (m-1)*bs;
5500880e062SHong Zhang   }
5510880e062SHong Zhang   for (i=0; i<m; i++) {
5520880e062SHong Zhang     if (im[i] < 0) continue;
5532515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
554bb003d0fSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
5550880e062SHong Zhang #endif
5560880e062SHong Zhang     if (im[i] >= rstart && im[i] < rend) {
5570880e062SHong Zhang       row = im[i] - rstart;
5580880e062SHong Zhang       for (j=0; j<n; j++) {
559f3f98c53SJed Brown         if (im[i] > in[j]) {
560f3f98c53SJed Brown           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
561e32f2f54SBarry Smith           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
562f3f98c53SJed Brown         }
5630880e062SHong Zhang         /* If NumCol = 1 then a copy is not required */
5640880e062SHong Zhang         if ((roworiented) && (n == 1)) {
565f15d580aSBarry Smith           barray = (MatScalar*) v + i*bs2;
5660880e062SHong Zhang         } else if ((!roworiented) && (m == 1)) {
567f15d580aSBarry Smith           barray = (MatScalar*) v + j*bs2;
5680880e062SHong Zhang         } else { /* Here a copy is required */
5690880e062SHong Zhang           if (roworiented) {
5700880e062SHong Zhang             value = v + i*(stepval+bs)*bs + j*bs;
5710880e062SHong Zhang           } else {
5720880e062SHong Zhang             value = v + j*(stepval+bs)*bs + i*bs;
5730880e062SHong Zhang           }
5740880e062SHong Zhang           for (ii=0; ii<bs; ii++,value+=stepval) {
5750880e062SHong Zhang             for (jj=0; jj<bs; jj++) {
5760880e062SHong Zhang               *barray++ = *value++;
5770880e062SHong Zhang             }
5780880e062SHong Zhang           }
5790880e062SHong Zhang           barray -=bs2;
5800880e062SHong Zhang         }
5810880e062SHong Zhang 
5820880e062SHong Zhang         if (in[j] >= cstart && in[j] < cend) {
5830880e062SHong Zhang           col  = in[j] - cstart;
58436bd2089SBarry Smith           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
58526fbe8dcSKarl Rupp         } else if (in[j] < 0) continue;
5862515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
587bb003d0fSBarry Smith         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
5880880e062SHong Zhang #endif
5890880e062SHong Zhang         else {
5900880e062SHong Zhang           if (mat->was_assembled) {
5910880e062SHong Zhang             if (!baij->colmap) {
592ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
5930880e062SHong Zhang             }
5940880e062SHong Zhang 
5952515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
5960880e062SHong Zhang #if defined(PETSC_USE_CTABLE)
5971302d50aSBarry Smith             { PetscInt data;
5980880e062SHong Zhang               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
599e32f2f54SBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
6000880e062SHong Zhang             }
6010880e062SHong Zhang #else
602e32f2f54SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
6030880e062SHong Zhang #endif
6040880e062SHong Zhang #endif
6050880e062SHong Zhang #if defined(PETSC_USE_CTABLE)
6060880e062SHong Zhang             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6070880e062SHong Zhang             col  = (col - 1)/bs;
6080880e062SHong Zhang #else
6090880e062SHong Zhang             col = (baij->colmap[in[j]] - 1)/bs;
6100880e062SHong Zhang #endif
6110880e062SHong Zhang             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
612ab9863d7SBarry Smith               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
6130880e062SHong Zhang               col  = in[j];
6140880e062SHong Zhang             }
61526fbe8dcSKarl Rupp           } else col = in[j];
61636bd2089SBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
6170880e062SHong Zhang         }
6180880e062SHong Zhang       }
6190880e062SHong Zhang     } else {
620bb003d0fSBarry Smith       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
6210880e062SHong Zhang       if (!baij->donotstash) {
6220880e062SHong Zhang         if (roworiented) {
6230880e062SHong Zhang           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6240880e062SHong Zhang         } else {
6250880e062SHong Zhang           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6260880e062SHong Zhang         }
6270880e062SHong Zhang       }
6280880e062SHong Zhang     }
6290880e062SHong Zhang   }
6300880e062SHong Zhang   PetscFunctionReturn(0);
631a30f8f8cSSatish Balay }
632a30f8f8cSSatish Balay 
6331302d50aSBarry Smith PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
634a30f8f8cSSatish Balay {
635f3566a2aSHong Zhang   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
6366849ba73SBarry Smith   PetscErrorCode ierr;
637d0f46423SBarry Smith   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
638d0f46423SBarry Smith   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
639a30f8f8cSSatish Balay 
640a30f8f8cSSatish Balay   PetscFunctionBegin;
641a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
642e32f2f54SBarry Smith     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
643e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
644a30f8f8cSSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
645a30f8f8cSSatish Balay       row = idxm[i] - bsrstart;
646a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
647e32f2f54SBarry Smith         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
648e32f2f54SBarry Smith         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
649a30f8f8cSSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend) {
650a30f8f8cSSatish Balay           col  = idxn[j] - bscstart;
651c8407628SSatish Balay           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
652a30f8f8cSSatish Balay         } else {
653a30f8f8cSSatish Balay           if (!baij->colmap) {
654ab9863d7SBarry Smith             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
655a30f8f8cSSatish Balay           }
656a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
657a30f8f8cSSatish Balay           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
658a30f8f8cSSatish Balay           data--;
659a30f8f8cSSatish Balay #else
660a30f8f8cSSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
661a30f8f8cSSatish Balay #endif
662a30f8f8cSSatish Balay           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
663a30f8f8cSSatish Balay           else {
664a30f8f8cSSatish Balay             col  = data + idxn[j]%bs;
665e249d750SSatish Balay             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
666a30f8f8cSSatish Balay           }
667a30f8f8cSSatish Balay         }
668a30f8f8cSSatish Balay       }
669f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
670a30f8f8cSSatish Balay   }
671a30f8f8cSSatish Balay   PetscFunctionReturn(0);
672a30f8f8cSSatish Balay }
673a30f8f8cSSatish Balay 
674dfbe8321SBarry Smith PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
675a30f8f8cSSatish Balay {
676a30f8f8cSSatish Balay   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
677dfbe8321SBarry Smith   PetscErrorCode ierr;
678a30f8f8cSSatish Balay   PetscReal      sum[2],*lnorm2;
679a30f8f8cSSatish Balay 
680a30f8f8cSSatish Balay   PetscFunctionBegin;
681a30f8f8cSSatish Balay   if (baij->size == 1) {
682a30f8f8cSSatish Balay     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
683a30f8f8cSSatish Balay   } else {
684a30f8f8cSSatish Balay     if (type == NORM_FROBENIUS) {
685785e854fSJed Brown       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
686a30f8f8cSSatish Balay       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
687a30f8f8cSSatish Balay       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
688a30f8f8cSSatish Balay       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
689a30f8f8cSSatish Balay       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
690b2566f29SBarry Smith       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
6918f1a2a5eSBarry Smith       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
692a30f8f8cSSatish Balay       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
6930b8dc8d2SHong Zhang     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
6940b8dc8d2SHong Zhang       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
6950b8dc8d2SHong Zhang       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
6960b8dc8d2SHong Zhang       PetscReal    *rsum,*rsum2,vabs;
697899cda47SBarry Smith       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
698d0f46423SBarry Smith       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
6990b8dc8d2SHong Zhang       MatScalar    *v;
7000b8dc8d2SHong Zhang 
701dcca6d9dSJed Brown       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
702580bdb30SBarry Smith       ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr);
7030b8dc8d2SHong Zhang       /* Amat */
7040b8dc8d2SHong Zhang       v = amat->a; jj = amat->j;
7050b8dc8d2SHong Zhang       for (brow=0; brow<mbs; brow++) {
7060b8dc8d2SHong Zhang         grow = bs*(rstart + brow);
7070b8dc8d2SHong Zhang         nz   = amat->i[brow+1] - amat->i[brow];
7080b8dc8d2SHong Zhang         for (bcol=0; bcol<nz; bcol++) {
7090b8dc8d2SHong Zhang           gcol = bs*(rstart + *jj); jj++;
7100b8dc8d2SHong Zhang           for (col=0; col<bs; col++) {
7110b8dc8d2SHong Zhang             for (row=0; row<bs; row++) {
7120b8dc8d2SHong Zhang               vabs            = PetscAbsScalar(*v); v++;
7130b8dc8d2SHong Zhang               rsum[gcol+col] += vabs;
7140b8dc8d2SHong Zhang               /* non-diagonal block */
7150b8dc8d2SHong Zhang               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
7160b8dc8d2SHong Zhang             }
7170b8dc8d2SHong Zhang           }
7180b8dc8d2SHong Zhang         }
71951f70360SJed Brown         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
7200b8dc8d2SHong Zhang       }
7210b8dc8d2SHong Zhang       /* Bmat */
7220b8dc8d2SHong Zhang       v = bmat->a; jj = bmat->j;
7230b8dc8d2SHong Zhang       for (brow=0; brow<mbs; brow++) {
7240b8dc8d2SHong Zhang         grow = bs*(rstart + brow);
7250b8dc8d2SHong Zhang         nz = bmat->i[brow+1] - bmat->i[brow];
7260b8dc8d2SHong Zhang         for (bcol=0; bcol<nz; bcol++) {
7270b8dc8d2SHong Zhang           gcol = bs*garray[*jj]; jj++;
7280b8dc8d2SHong Zhang           for (col=0; col<bs; col++) {
7290b8dc8d2SHong Zhang             for (row=0; row<bs; row++) {
7300b8dc8d2SHong Zhang               vabs            = PetscAbsScalar(*v); v++;
7310b8dc8d2SHong Zhang               rsum[gcol+col] += vabs;
7320b8dc8d2SHong Zhang               rsum[grow+row] += vabs;
7330b8dc8d2SHong Zhang             }
7340b8dc8d2SHong Zhang           }
7350b8dc8d2SHong Zhang         }
73651f70360SJed Brown         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
7370b8dc8d2SHong Zhang       }
738b2566f29SBarry Smith       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
7390b8dc8d2SHong Zhang       *norm = 0.0;
740d0f46423SBarry Smith       for (col=0; col<mat->cmap->N; col++) {
7410b8dc8d2SHong Zhang         if (rsum2[col] > *norm) *norm = rsum2[col];
7420b8dc8d2SHong Zhang       }
74374ed9c26SBarry Smith       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
744f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
745a30f8f8cSSatish Balay   }
746a30f8f8cSSatish Balay   PetscFunctionReturn(0);
747a30f8f8cSSatish Balay }
748a30f8f8cSSatish Balay 
749dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
750a30f8f8cSSatish Balay {
751a30f8f8cSSatish Balay   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
752dfbe8321SBarry Smith   PetscErrorCode ierr;
7531302d50aSBarry Smith   PetscInt       nstash,reallocs;
754a30f8f8cSSatish Balay 
755a30f8f8cSSatish Balay   PetscFunctionBegin;
75626fbe8dcSKarl Rupp   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
757a30f8f8cSSatish Balay 
758d0f46423SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
7591e2582c4SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
760a30f8f8cSSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
7611e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
762a30f8f8cSSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
7631e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
764a30f8f8cSSatish Balay   PetscFunctionReturn(0);
765a30f8f8cSSatish Balay }
766a30f8f8cSSatish Balay 
767dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
768a30f8f8cSSatish Balay {
769a30f8f8cSSatish Balay   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
770a30f8f8cSSatish Balay   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
7716849ba73SBarry Smith   PetscErrorCode ierr;
77213f74950SBarry Smith   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
773e44c0bd4SBarry Smith   PetscInt       *row,*col;
774ace3abfcSBarry Smith   PetscBool      other_disassembled;
77513f74950SBarry Smith   PetscMPIInt    n;
776ace3abfcSBarry Smith   PetscBool      r1,r2,r3;
777a30f8f8cSSatish Balay   MatScalar      *val;
778a30f8f8cSSatish Balay 
77991c97fd4SSatish Balay   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
780a30f8f8cSSatish Balay   PetscFunctionBegin;
7814cb17eb5SBarry Smith   if (!baij->donotstash &&  !mat->nooffprocentries) {
782a30f8f8cSSatish Balay     while (1) {
783a30f8f8cSSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
784a30f8f8cSSatish Balay       if (!flg) break;
785a30f8f8cSSatish Balay 
786a30f8f8cSSatish Balay       for (i=0; i<n;) {
787a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
78826fbe8dcSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
78926fbe8dcSKarl Rupp           if (row[j] != rstart) break;
79026fbe8dcSKarl Rupp         }
791a30f8f8cSSatish Balay         if (j < n) ncols = j-i;
792a30f8f8cSSatish Balay         else       ncols = n-i;
793a30f8f8cSSatish Balay         /* Now assemble all these values with a single function call */
7944b4eb8d3SJed Brown         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
795a30f8f8cSSatish Balay         i    = j;
796a30f8f8cSSatish Balay       }
797a30f8f8cSSatish Balay     }
798a30f8f8cSSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
799a30f8f8cSSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
800a30f8f8cSSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
801a30f8f8cSSatish Balay        restore the original flags */
802a30f8f8cSSatish Balay     r1 = baij->roworiented;
803a30f8f8cSSatish Balay     r2 = a->roworiented;
80491c97fd4SSatish Balay     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
80526fbe8dcSKarl Rupp 
806a30f8f8cSSatish Balay     baij->roworiented = PETSC_FALSE;
807a30f8f8cSSatish Balay     a->roworiented    = PETSC_FALSE;
80826fbe8dcSKarl Rupp 
80991c97fd4SSatish Balay     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
810a30f8f8cSSatish Balay     while (1) {
811a30f8f8cSSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
812a30f8f8cSSatish Balay       if (!flg) break;
813a30f8f8cSSatish Balay 
814a30f8f8cSSatish Balay       for (i=0; i<n;) {
815a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
81626fbe8dcSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
81726fbe8dcSKarl Rupp           if (row[j] != rstart) break;
81826fbe8dcSKarl Rupp         }
819a30f8f8cSSatish Balay         if (j < n) ncols = j-i;
820a30f8f8cSSatish Balay         else       ncols = n-i;
8214b4eb8d3SJed Brown         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
822a30f8f8cSSatish Balay         i    = j;
823a30f8f8cSSatish Balay       }
824a30f8f8cSSatish Balay     }
825a30f8f8cSSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
82626fbe8dcSKarl Rupp 
827a30f8f8cSSatish Balay     baij->roworiented = r1;
828a30f8f8cSSatish Balay     a->roworiented    = r2;
82926fbe8dcSKarl Rupp 
83091c97fd4SSatish Balay     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
831a30f8f8cSSatish Balay   }
832a30f8f8cSSatish Balay 
833a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
834a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
835a30f8f8cSSatish Balay 
836a30f8f8cSSatish Balay   /* determine if any processor has disassembled, if so we must
837a30f8f8cSSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
838a30f8f8cSSatish Balay   /*
839a30f8f8cSSatish Balay      if nonzero structure of submatrix B cannot change then we know that
840a30f8f8cSSatish Balay      no processor disassembled thus we can skip this stuff
841a30f8f8cSSatish Balay   */
842a30f8f8cSSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
843b2566f29SBarry Smith     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
844a30f8f8cSSatish Balay     if (mat->was_assembled && !other_disassembled) {
845ab9863d7SBarry Smith       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
846a30f8f8cSSatish Balay     }
847a30f8f8cSSatish Balay   }
848a30f8f8cSSatish Balay 
849a30f8f8cSSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
85040781036SHong Zhang     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
851a30f8f8cSSatish Balay   }
852a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
853a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
854a30f8f8cSSatish Balay 
85574ed9c26SBarry Smith   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
85626fbe8dcSKarl Rupp 
857a30f8f8cSSatish Balay   baij->rowvalues = 0;
8584f9cfa9eSBarry Smith 
8594f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
8604f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
861e56f5c9eSBarry Smith     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
862b2566f29SBarry Smith     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
863e56f5c9eSBarry Smith   }
864a30f8f8cSSatish Balay   PetscFunctionReturn(0);
865a30f8f8cSSatish Balay }
866a30f8f8cSSatish Balay 
867dd6ea824SBarry Smith extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
8689804daf3SBarry Smith #include <petscdraw.h>
8696849ba73SBarry Smith static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
870a30f8f8cSSatish Balay {
871a30f8f8cSSatish Balay   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
872dfbe8321SBarry Smith   PetscErrorCode    ierr;
873d0f46423SBarry Smith   PetscInt          bs   = mat->rmap->bs;
8747da1fb6eSBarry Smith   PetscMPIInt       rank = baij->rank;
875ace3abfcSBarry Smith   PetscBool         iascii,isdraw;
876b0a32e0cSBarry Smith   PetscViewer       sviewer;
877f3ef73ceSBarry Smith   PetscViewerFormat format;
878a30f8f8cSSatish Balay 
879a30f8f8cSSatish Balay   PetscFunctionBegin;
880251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
881251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
88232077d6dSBarry Smith   if (iascii) {
883b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
884456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885a30f8f8cSSatish Balay       MatInfo info;
886ce94432eSBarry Smith       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
887a30f8f8cSSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
8881575c14dSBarry Smith       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
889b1e9c6f1SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
890a30f8f8cSSatish Balay       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
891e6dd01d4SJed Brown       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
892a30f8f8cSSatish Balay       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
893e6dd01d4SJed Brown       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
894b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
8951575c14dSBarry Smith       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
89607d81ca4SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
897a30f8f8cSSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
898a30f8f8cSSatish Balay       PetscFunctionReturn(0);
899fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
90077431f27SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
901a30f8f8cSSatish Balay       PetscFunctionReturn(0);
902c1490034SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
903c1490034SHong Zhang       PetscFunctionReturn(0);
904a30f8f8cSSatish Balay     }
905a30f8f8cSSatish Balay   }
906a30f8f8cSSatish Balay 
907a30f8f8cSSatish Balay   if (isdraw) {
908b0a32e0cSBarry Smith     PetscDraw draw;
909ace3abfcSBarry Smith     PetscBool isnull;
910b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
91145f3bb6eSLisandro Dalcin     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
91245f3bb6eSLisandro Dalcin     if (isnull) PetscFunctionReturn(0);
913a30f8f8cSSatish Balay   }
914a30f8f8cSSatish Balay 
9157da1fb6eSBarry Smith   {
916a30f8f8cSSatish Balay     /* assemble the entire matrix onto first processor. */
917a30f8f8cSSatish Balay     Mat          A;
91865d70643SHong Zhang     Mat_SeqSBAIJ *Aloc;
91965d70643SHong Zhang     Mat_SeqBAIJ  *Bloc;
920d0f46423SBarry Smith     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
921a30f8f8cSSatish Balay     MatScalar    *a;
9223e219373SBarry Smith     const char   *matname;
923a30f8f8cSSatish Balay 
924f204ca49SKris Buschelman     /* Should this be the same type as mat? */
925ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
926a30f8f8cSSatish Balay     if (!rank) {
927f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
928a30f8f8cSSatish Balay     } else {
929f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
930a30f8f8cSSatish Balay     }
931f204ca49SKris Buschelman     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
9320298fd71SBarry Smith     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
9332b82e772SSatish Balay     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
9343bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
935a30f8f8cSSatish Balay 
936a30f8f8cSSatish Balay     /* copy over the A part */
93765d70643SHong Zhang     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
938a30f8f8cSSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
939785e854fSJed Brown     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
940a30f8f8cSSatish Balay 
941a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
942e9f7bc9eSHong Zhang       rvals[0] = bs*(baij->rstartbs + i);
94326fbe8dcSKarl Rupp       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
944a30f8f8cSSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
945e9f7bc9eSHong Zhang         col = (baij->cstartbs+aj[j])*bs;
946a30f8f8cSSatish Balay         for (k=0; k<bs; k++) {
947dd6ea824SBarry Smith           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
94826fbe8dcSKarl Rupp           col++;
94926fbe8dcSKarl Rupp           a += bs;
950a30f8f8cSSatish Balay         }
951a30f8f8cSSatish Balay       }
952a30f8f8cSSatish Balay     }
953a30f8f8cSSatish Balay     /* copy over the B part */
95465d70643SHong Zhang     Bloc = (Mat_SeqBAIJ*)baij->B->data;
95565d70643SHong Zhang     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
956a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
957e9f7bc9eSHong Zhang 
958e9f7bc9eSHong Zhang       rvals[0] = bs*(baij->rstartbs + i);
95926fbe8dcSKarl Rupp       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
960a30f8f8cSSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
961a30f8f8cSSatish Balay         col = baij->garray[aj[j]]*bs;
962a30f8f8cSSatish Balay         for (k=0; k<bs; k++) {
963799bb49cSHong Zhang           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
96426fbe8dcSKarl Rupp           col++;
96526fbe8dcSKarl Rupp           a += bs;
966a30f8f8cSSatish Balay         }
967a30f8f8cSSatish Balay       }
968a30f8f8cSSatish Balay     }
969a30f8f8cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
970a30f8f8cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
971a30f8f8cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972a30f8f8cSSatish Balay     /*
973a30f8f8cSSatish Balay        Everyone has to call to draw the matrix since the graphics waits are
974b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
975a30f8f8cSSatish Balay     */
9763f08860eSBarry Smith     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
977ade3a672SBarry Smith     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
9783e219373SBarry Smith     if (!rank) {
979ade3a672SBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
980383922c3SLisandro Dalcin       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
981a30f8f8cSSatish Balay     }
9823f08860eSBarry Smith     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
9831575c14dSBarry Smith     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
9846bf464f9SBarry Smith     ierr = MatDestroy(&A);CHKERRQ(ierr);
985a30f8f8cSSatish Balay   }
986a30f8f8cSSatish Balay   PetscFunctionReturn(0);
987a30f8f8cSSatish Balay }
988a30f8f8cSSatish Balay 
989*618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
990*618cc2edSLisandro Dalcin #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
991d1654148SHong Zhang 
992dfbe8321SBarry Smith PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
993a30f8f8cSSatish Balay {
994dfbe8321SBarry Smith   PetscErrorCode ierr;
995ace3abfcSBarry Smith   PetscBool      iascii,isdraw,issocket,isbinary;
996a30f8f8cSSatish Balay 
997a30f8f8cSSatish Balay   PetscFunctionBegin;
998251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
999251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1000251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1001251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1002d1654148SHong Zhang   if (iascii || isdraw || issocket) {
1003a30f8f8cSSatish Balay     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1004d1654148SHong Zhang   } else if (isbinary) {
1005d1654148SHong Zhang     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1006a30f8f8cSSatish Balay   }
1007a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1008a30f8f8cSSatish Balay }
1009a30f8f8cSSatish Balay 
1010dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1011a30f8f8cSSatish Balay {
1012a30f8f8cSSatish Balay   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1013dfbe8321SBarry Smith   PetscErrorCode ierr;
1014a30f8f8cSSatish Balay 
1015a30f8f8cSSatish Balay   PetscFunctionBegin;
1016a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG)
1017d0f46423SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1018a30f8f8cSSatish Balay #endif
1019a30f8f8cSSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1020a30f8f8cSSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
10216bf464f9SBarry Smith   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
10226bf464f9SBarry Smith   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1023a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
10246bc0bbbfSBarry Smith   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1025a30f8f8cSSatish Balay #else
102605b42c5fSBarry Smith   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1027a30f8f8cSSatish Balay #endif
102805b42c5fSBarry Smith   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
10296bf464f9SBarry Smith   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
10306bf464f9SBarry Smith   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
10316bf464f9SBarry Smith   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
10326bf464f9SBarry Smith   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
10336bf464f9SBarry Smith   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
10346bf464f9SBarry Smith   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
10356bf464f9SBarry Smith   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
10366bf464f9SBarry Smith   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
10375755ff91SHong Zhang   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
103805b42c5fSBarry Smith   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
103905b42c5fSBarry Smith   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
10406bf464f9SBarry Smith   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
10416bf464f9SBarry Smith   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
10426bf464f9SBarry Smith   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1043ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
104405b42c5fSBarry Smith   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1045a30f8f8cSSatish Balay #endif
104659ffdab8SBarry Smith   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
104759ffdab8SBarry Smith   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1048899cda47SBarry Smith   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1049bf0cc555SLisandro Dalcin   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1050901853e0SKris Buschelman 
1051dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1052bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1053bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1054bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1055d2c30c80SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
10566214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
10576214f412SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
10586214f412SHong Zhang #endif
1059b147fbf3SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1060b147fbf3SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1061a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1062a30f8f8cSSatish Balay }
1063a30f8f8cSSatish Balay 
1064547795f9SHong Zhang PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1065547795f9SHong Zhang {
1066547795f9SHong Zhang   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1067547795f9SHong Zhang   PetscErrorCode    ierr;
1068eb1ec7c1SStefano Zampini   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
10696de40e93SBarry Smith   PetscScalar       *from;
10706de40e93SBarry Smith   const PetscScalar *x;
1071547795f9SHong Zhang 
1072547795f9SHong Zhang   PetscFunctionBegin;
1073547795f9SHong Zhang   /* diagonal part */
1074547795f9SHong Zhang   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1075547795f9SHong Zhang   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1076547795f9SHong Zhang 
1077547795f9SHong Zhang   /* subdiagonal part */
1078547795f9SHong Zhang   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1079547795f9SHong Zhang 
1080547795f9SHong Zhang   /* copy x into the vec slvec0 */
1081547795f9SHong Zhang   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
10826de40e93SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1083547795f9SHong Zhang 
1084580bdb30SBarry Smith   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1085547795f9SHong Zhang   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
10866de40e93SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1087547795f9SHong Zhang 
1088547795f9SHong Zhang   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1089547795f9SHong Zhang   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090547795f9SHong Zhang   /* supperdiagonal part */
1091547795f9SHong Zhang   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1092547795f9SHong Zhang   PetscFunctionReturn(0);
1093547795f9SHong Zhang }
1094547795f9SHong Zhang 
1095dfbe8321SBarry Smith PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1096a9d4b620SHong Zhang {
1097a9d4b620SHong Zhang   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1098dfbe8321SBarry Smith   PetscErrorCode    ierr;
1099eb1ec7c1SStefano Zampini   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1100d9ca1df4SBarry Smith   PetscScalar       *from;
1101d9ca1df4SBarry Smith   const PetscScalar *x;
1102a9d4b620SHong Zhang 
1103a9d4b620SHong Zhang   PetscFunctionBegin;
1104a9d4b620SHong Zhang   /* diagonal part */
1105a9d4b620SHong Zhang   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1106fa22f6d0SBarry Smith   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1107a9d4b620SHong Zhang 
1108a9d4b620SHong Zhang   /* subdiagonal part */
1109a9d4b620SHong Zhang   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1110fc165ae2SBarry Smith 
1111a9d4b620SHong Zhang   /* copy x into the vec slvec0 */
11121ebc52fbSHong Zhang   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1113d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1114a9d4b620SHong Zhang 
1115580bdb30SBarry Smith   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1116fc165ae2SBarry Smith   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1117d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1118fc165ae2SBarry Smith 
1119fc165ae2SBarry Smith   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1120ca9f406cSSatish Balay   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1121a9d4b620SHong Zhang   /* supperdiagonal part */
1122a9d4b620SHong Zhang   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1123a9d4b620SHong Zhang   PetscFunctionReturn(0);
1124a9d4b620SHong Zhang }
1125a9d4b620SHong Zhang 
1126dfbe8321SBarry Smith PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1127a30f8f8cSSatish Balay {
1128a30f8f8cSSatish Balay   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1129dfbe8321SBarry Smith   PetscErrorCode ierr;
11301302d50aSBarry Smith   PetscInt       nt;
1131a30f8f8cSSatish Balay 
1132a30f8f8cSSatish Balay   PetscFunctionBegin;
1133a30f8f8cSSatish Balay   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1134e7e72b3dSBarry Smith   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1135e7e72b3dSBarry Smith 
1136a30f8f8cSSatish Balay   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1137e7e72b3dSBarry Smith   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
113865d70643SHong Zhang 
1139ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140b941877fSHong Zhang   /* do diagonal part */
1141b941877fSHong Zhang   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1142b941877fSHong Zhang   /* do supperdiagonal part */
1143ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1144b941877fSHong Zhang   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1145b941877fSHong Zhang   /* do subdiagonal part */
1146b941877fSHong Zhang   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1147ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1149a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1150a30f8f8cSSatish Balay }
1151a30f8f8cSSatish Balay 
1152eb1ec7c1SStefano Zampini PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1153eb1ec7c1SStefano Zampini {
1154eb1ec7c1SStefano Zampini   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1155eb1ec7c1SStefano Zampini   PetscErrorCode    ierr;
1156eb1ec7c1SStefano Zampini   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1157eb1ec7c1SStefano Zampini   PetscScalar       *from,zero=0.0;
1158eb1ec7c1SStefano Zampini   const PetscScalar *x;
1159eb1ec7c1SStefano Zampini 
1160eb1ec7c1SStefano Zampini   PetscFunctionBegin;
1161eb1ec7c1SStefano Zampini   /* diagonal part */
1162eb1ec7c1SStefano Zampini   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1163eb1ec7c1SStefano Zampini   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1164eb1ec7c1SStefano Zampini 
1165eb1ec7c1SStefano Zampini   /* subdiagonal part */
1166eb1ec7c1SStefano Zampini   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1167eb1ec7c1SStefano Zampini 
1168eb1ec7c1SStefano Zampini   /* copy x into the vec slvec0 */
1169eb1ec7c1SStefano Zampini   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1170eb1ec7c1SStefano Zampini   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1171eb1ec7c1SStefano Zampini   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1172eb1ec7c1SStefano Zampini   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1173eb1ec7c1SStefano Zampini 
1174eb1ec7c1SStefano Zampini   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1175eb1ec7c1SStefano Zampini   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1176eb1ec7c1SStefano Zampini   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1177eb1ec7c1SStefano Zampini 
1178eb1ec7c1SStefano Zampini   /* supperdiagonal part */
1179eb1ec7c1SStefano Zampini   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1180eb1ec7c1SStefano Zampini   PetscFunctionReturn(0);
1181eb1ec7c1SStefano Zampini }
1182eb1ec7c1SStefano Zampini 
1183dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1184a30f8f8cSSatish Balay {
1185de8b6608SHong Zhang   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1186dfbe8321SBarry Smith   PetscErrorCode    ierr;
1187d0f46423SBarry Smith   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1188d9ca1df4SBarry Smith   PetscScalar       *from,zero=0.0;
1189d9ca1df4SBarry Smith   const PetscScalar *x;
1190a9d4b620SHong Zhang 
1191a9d4b620SHong Zhang   PetscFunctionBegin;
1192a9d4b620SHong Zhang   /* diagonal part */
1193a9d4b620SHong Zhang   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
11942dcb1b2aSMatthew Knepley   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1195a9d4b620SHong Zhang 
1196a9d4b620SHong Zhang   /* subdiagonal part */
1197a9d4b620SHong Zhang   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1198a9d4b620SHong Zhang 
1199a9d4b620SHong Zhang   /* copy x into the vec slvec0 */
12001ebc52fbSHong Zhang   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1201d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1202580bdb30SBarry Smith   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
12031ebc52fbSHong Zhang   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1204a9d4b620SHong Zhang 
1205ca9f406cSSatish Balay   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1207ca9f406cSSatish Balay   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1208a9d4b620SHong Zhang 
1209a9d4b620SHong Zhang   /* supperdiagonal part */
1210a9d4b620SHong Zhang   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1211a9d4b620SHong Zhang   PetscFunctionReturn(0);
1212a9d4b620SHong Zhang }
1213a9d4b620SHong Zhang 
1214dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1215a9d4b620SHong Zhang {
1216a9d4b620SHong Zhang   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1217dfbe8321SBarry Smith   PetscErrorCode ierr;
1218a30f8f8cSSatish Balay 
1219a30f8f8cSSatish Balay   PetscFunctionBegin;
1220ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1221b941877fSHong Zhang   /* do diagonal part */
1222b941877fSHong Zhang   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1223b941877fSHong Zhang   /* do supperdiagonal part */
1224ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1225de8b6608SHong Zhang   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1226de8b6608SHong Zhang 
1227b941877fSHong Zhang   /* do subdiagonal part */
1228a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1229ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1231a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1232a30f8f8cSSatish Balay }
1233a30f8f8cSSatish Balay 
1234a30f8f8cSSatish Balay /*
1235a30f8f8cSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1236a30f8f8cSSatish Balay    diagonal block
1237a30f8f8cSSatish Balay */
1238dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1239a30f8f8cSSatish Balay {
1240a30f8f8cSSatish Balay   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1241dfbe8321SBarry Smith   PetscErrorCode ierr;
1242a30f8f8cSSatish Balay 
1243a30f8f8cSSatish Balay   PetscFunctionBegin;
1244e32f2f54SBarry Smith   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1245a30f8f8cSSatish Balay   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1246a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1247a30f8f8cSSatish Balay }
1248a30f8f8cSSatish Balay 
1249f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1250a30f8f8cSSatish Balay {
1251a30f8f8cSSatish Balay   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1252dfbe8321SBarry Smith   PetscErrorCode ierr;
1253a30f8f8cSSatish Balay 
1254a30f8f8cSSatish Balay   PetscFunctionBegin;
1255f4df32b1SMatthew Knepley   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1256f4df32b1SMatthew Knepley   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1257a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1258a30f8f8cSSatish Balay }
1259a30f8f8cSSatish Balay 
12601302d50aSBarry Smith PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261a30f8f8cSSatish Balay {
1262d0d4cfc2SHong Zhang   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1263d0d4cfc2SHong Zhang   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264d0d4cfc2SHong Zhang   PetscErrorCode ierr;
1265d0f46423SBarry Smith   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1266d0f46423SBarry Smith   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1267899cda47SBarry Smith   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1268d0d4cfc2SHong Zhang 
1269a30f8f8cSSatish Balay   PetscFunctionBegin;
1270e32f2f54SBarry Smith   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271d0d4cfc2SHong Zhang   mat->getrowactive = PETSC_TRUE;
1272d0d4cfc2SHong Zhang 
1273d0d4cfc2SHong Zhang   if (!mat->rowvalues && (idx || v)) {
1274d0d4cfc2SHong Zhang     /*
1275d0d4cfc2SHong Zhang         allocate enough space to hold information from the longest row.
1276d0d4cfc2SHong Zhang     */
1277d0d4cfc2SHong Zhang     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1278d0d4cfc2SHong Zhang     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1279d0d4cfc2SHong Zhang     PetscInt     max = 1,mbs = mat->mbs,tmp;
1280d0d4cfc2SHong Zhang     for (i=0; i<mbs; i++) {
1281d0d4cfc2SHong Zhang       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
128226fbe8dcSKarl Rupp       if (max < tmp) max = tmp;
1283d0d4cfc2SHong Zhang     }
1284dcca6d9dSJed Brown     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1285d0d4cfc2SHong Zhang   }
1286d0d4cfc2SHong Zhang 
1287e7e72b3dSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1288d0d4cfc2SHong Zhang   lrow = row - brstart;  /* local row index */
1289d0d4cfc2SHong Zhang 
1290d0d4cfc2SHong Zhang   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291d0d4cfc2SHong Zhang   if (!v)   {pvA = 0; pvB = 0;}
1292d0d4cfc2SHong Zhang   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293d0d4cfc2SHong Zhang   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294d0d4cfc2SHong Zhang   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295d0d4cfc2SHong Zhang   nztot = nzA + nzB;
1296d0d4cfc2SHong Zhang 
1297d0d4cfc2SHong Zhang   cmap = mat->garray;
1298d0d4cfc2SHong Zhang   if (v  || idx) {
1299d0d4cfc2SHong Zhang     if (nztot) {
1300d0d4cfc2SHong Zhang       /* Sort by increasing column numbers, assuming A and B already sorted */
1301d0d4cfc2SHong Zhang       PetscInt imark = -1;
1302d0d4cfc2SHong Zhang       if (v) {
1303d0d4cfc2SHong Zhang         *v = v_p = mat->rowvalues;
1304d0d4cfc2SHong Zhang         for (i=0; i<nzB; i++) {
1305d0d4cfc2SHong Zhang           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1306d0d4cfc2SHong Zhang           else break;
1307d0d4cfc2SHong Zhang         }
1308d0d4cfc2SHong Zhang         imark = i;
1309d0d4cfc2SHong Zhang         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310d0d4cfc2SHong Zhang         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311d0d4cfc2SHong Zhang       }
1312d0d4cfc2SHong Zhang       if (idx) {
1313d0d4cfc2SHong Zhang         *idx = idx_p = mat->rowindices;
1314d0d4cfc2SHong Zhang         if (imark > -1) {
1315d0d4cfc2SHong Zhang           for (i=0; i<imark; i++) {
1316d0d4cfc2SHong Zhang             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1317d0d4cfc2SHong Zhang           }
1318d0d4cfc2SHong Zhang         } else {
1319d0d4cfc2SHong Zhang           for (i=0; i<nzB; i++) {
132026fbe8dcSKarl Rupp             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1321d0d4cfc2SHong Zhang             else break;
1322d0d4cfc2SHong Zhang           }
1323d0d4cfc2SHong Zhang           imark = i;
1324d0d4cfc2SHong Zhang         }
1325d0d4cfc2SHong Zhang         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1326d0d4cfc2SHong Zhang         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1327d0d4cfc2SHong Zhang       }
1328d0d4cfc2SHong Zhang     } else {
1329d0d4cfc2SHong Zhang       if (idx) *idx = 0;
1330d0d4cfc2SHong Zhang       if (v)   *v   = 0;
1331d0d4cfc2SHong Zhang     }
1332d0d4cfc2SHong Zhang   }
1333d0d4cfc2SHong Zhang   *nz  = nztot;
1334d0d4cfc2SHong Zhang   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335d0d4cfc2SHong Zhang   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1337a30f8f8cSSatish Balay }
1338a30f8f8cSSatish Balay 
13391302d50aSBarry Smith PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1340a30f8f8cSSatish Balay {
1341a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1342a30f8f8cSSatish Balay 
1343a30f8f8cSSatish Balay   PetscFunctionBegin;
1344e7e72b3dSBarry Smith   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1345a30f8f8cSSatish Balay   baij->getrowactive = PETSC_FALSE;
1346a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1347a30f8f8cSSatish Balay }
1348a30f8f8cSSatish Balay 
1349d0d4cfc2SHong Zhang PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1350d0d4cfc2SHong Zhang {
1351d0d4cfc2SHong Zhang   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1352d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1353d0d4cfc2SHong Zhang 
1354d0d4cfc2SHong Zhang   PetscFunctionBegin;
1355d0d4cfc2SHong Zhang   aA->getrow_utriangular = PETSC_TRUE;
1356d0d4cfc2SHong Zhang   PetscFunctionReturn(0);
1357d0d4cfc2SHong Zhang }
1358d0d4cfc2SHong Zhang PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1359d0d4cfc2SHong Zhang {
1360d0d4cfc2SHong Zhang   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1361d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1362d0d4cfc2SHong Zhang 
1363d0d4cfc2SHong Zhang   PetscFunctionBegin;
1364d0d4cfc2SHong Zhang   aA->getrow_utriangular = PETSC_FALSE;
1365d0d4cfc2SHong Zhang   PetscFunctionReturn(0);
1366d0d4cfc2SHong Zhang }
1367d0d4cfc2SHong Zhang 
136899cafbc1SBarry Smith PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
136999cafbc1SBarry Smith {
137099cafbc1SBarry Smith   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
137199cafbc1SBarry Smith   PetscErrorCode ierr;
137299cafbc1SBarry Smith 
137399cafbc1SBarry Smith   PetscFunctionBegin;
137499cafbc1SBarry Smith   ierr = MatRealPart(a->A);CHKERRQ(ierr);
137599cafbc1SBarry Smith   ierr = MatRealPart(a->B);CHKERRQ(ierr);
137699cafbc1SBarry Smith   PetscFunctionReturn(0);
137799cafbc1SBarry Smith }
137899cafbc1SBarry Smith 
137999cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
138099cafbc1SBarry Smith {
138199cafbc1SBarry Smith   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
138299cafbc1SBarry Smith   PetscErrorCode ierr;
138399cafbc1SBarry Smith 
138499cafbc1SBarry Smith   PetscFunctionBegin;
138599cafbc1SBarry Smith   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
138699cafbc1SBarry Smith   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
138799cafbc1SBarry Smith   PetscFunctionReturn(0);
138899cafbc1SBarry Smith }
138999cafbc1SBarry Smith 
13907dae84e0SHong Zhang /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
139136032a97SHong Zhang    Input: isrow       - distributed(parallel),
139236032a97SHong Zhang           iscol_local - locally owned (seq)
139336032a97SHong Zhang */
139436032a97SHong Zhang PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
13958f46ffcaSHong Zhang {
13968f46ffcaSHong Zhang   PetscErrorCode ierr;
13978f46ffcaSHong Zhang   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
13988f46ffcaSHong Zhang   const PetscInt *ptr1,*ptr2;
139936032a97SHong Zhang 
140036032a97SHong Zhang   PetscFunctionBegin;
14018f46ffcaSHong Zhang   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
14028f46ffcaSHong Zhang   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
14031098a8e8SHong Zhang   if (sz1 > sz2) {
14041098a8e8SHong Zhang     *flg = PETSC_FALSE;
14051098a8e8SHong Zhang     PetscFunctionReturn(0);
14061098a8e8SHong Zhang   }
14078f46ffcaSHong Zhang 
14088f46ffcaSHong Zhang   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
14098f46ffcaSHong Zhang   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
14108f46ffcaSHong Zhang 
14118f46ffcaSHong Zhang   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
14128f46ffcaSHong Zhang   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1413580bdb30SBarry Smith   ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr);
1414580bdb30SBarry Smith   ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr);
14158f46ffcaSHong Zhang   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
14168f46ffcaSHong Zhang   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
14178f46ffcaSHong Zhang 
14188f46ffcaSHong Zhang   nmatch=0;
14198f46ffcaSHong Zhang   k     = 0;
14208f46ffcaSHong Zhang   for (i=0; i<sz1; i++){
14218f46ffcaSHong Zhang     for (j=k; j<sz2; j++){
14228f46ffcaSHong Zhang       if (a1[i] == a2[j]) {
14238f46ffcaSHong Zhang         k = j; nmatch++;
14248f46ffcaSHong Zhang         break;
14258f46ffcaSHong Zhang       }
14268f46ffcaSHong Zhang     }
14278f46ffcaSHong Zhang   }
14288f46ffcaSHong Zhang   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
14298f46ffcaSHong Zhang   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
14308f46ffcaSHong Zhang   ierr = PetscFree(a1);CHKERRQ(ierr);
14318f46ffcaSHong Zhang   ierr = PetscFree(a2);CHKERRQ(ierr);
14321098a8e8SHong Zhang   if (nmatch < sz1) {
14331098a8e8SHong Zhang     *flg = PETSC_FALSE;
14341098a8e8SHong Zhang   } else {
14351098a8e8SHong Zhang     *flg = PETSC_TRUE;
14361098a8e8SHong Zhang   }
143736032a97SHong Zhang   PetscFunctionReturn(0);
14388f46ffcaSHong Zhang }
143936032a97SHong Zhang 
14407dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
144136032a97SHong Zhang {
144236032a97SHong Zhang   PetscErrorCode ierr;
144336032a97SHong Zhang   IS             iscol_local;
144436032a97SHong Zhang   PetscInt       csize;
144536032a97SHong Zhang   PetscBool      isequal;
144636032a97SHong Zhang 
144736032a97SHong Zhang   PetscFunctionBegin;
144836032a97SHong Zhang   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
144936032a97SHong Zhang   if (call == MAT_REUSE_MATRIX) {
145036032a97SHong Zhang     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
145136032a97SHong Zhang     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
145236032a97SHong Zhang   } else {
145336032a97SHong Zhang     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
145436032a97SHong Zhang     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
145536032a97SHong Zhang     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
14568f46ffcaSHong Zhang   }
14578f46ffcaSHong Zhang 
14587dae84e0SHong Zhang   /* now call MatCreateSubMatrix_MPIBAIJ() */
14597dae84e0SHong Zhang   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
14608f46ffcaSHong Zhang   if (call == MAT_INITIAL_MATRIX) {
14618f46ffcaSHong Zhang     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
14628f46ffcaSHong Zhang     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
14638f46ffcaSHong Zhang   }
14648f46ffcaSHong Zhang   PetscFunctionReturn(0);
14658f46ffcaSHong Zhang }
14668f46ffcaSHong Zhang 
1467dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1468a30f8f8cSSatish Balay {
1469a30f8f8cSSatish Balay   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1470dfbe8321SBarry Smith   PetscErrorCode ierr;
1471a30f8f8cSSatish Balay 
1472a30f8f8cSSatish Balay   PetscFunctionBegin;
1473a30f8f8cSSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1474a30f8f8cSSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1475a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1476a30f8f8cSSatish Balay }
1477a30f8f8cSSatish Balay 
1478dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1479a30f8f8cSSatish Balay {
1480a30f8f8cSSatish Balay   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1481a30f8f8cSSatish Balay   Mat            A  = a->A,B = a->B;
1482dfbe8321SBarry Smith   PetscErrorCode ierr;
14833966268fSBarry Smith   PetscLogDouble isend[5],irecv[5];
1484a30f8f8cSSatish Balay 
1485a30f8f8cSSatish Balay   PetscFunctionBegin;
1486d0f46423SBarry Smith   info->block_size = (PetscReal)matin->rmap->bs;
148726fbe8dcSKarl Rupp 
1488a30f8f8cSSatish Balay   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
148926fbe8dcSKarl Rupp 
1490a30f8f8cSSatish Balay   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1491a30f8f8cSSatish Balay   isend[3] = info->memory;  isend[4] = info->mallocs;
149226fbe8dcSKarl Rupp 
1493a30f8f8cSSatish Balay   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
149426fbe8dcSKarl Rupp 
1495a30f8f8cSSatish Balay   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1496a30f8f8cSSatish Balay   isend[3] += info->memory;  isend[4] += info->mallocs;
1497a30f8f8cSSatish Balay   if (flag == MAT_LOCAL) {
1498a30f8f8cSSatish Balay     info->nz_used      = isend[0];
1499a30f8f8cSSatish Balay     info->nz_allocated = isend[1];
1500a30f8f8cSSatish Balay     info->nz_unneeded  = isend[2];
1501a30f8f8cSSatish Balay     info->memory       = isend[3];
1502a30f8f8cSSatish Balay     info->mallocs      = isend[4];
1503a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
15043966268fSBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
150526fbe8dcSKarl Rupp 
1506a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1507a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1508a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1509a30f8f8cSSatish Balay     info->memory       = irecv[3];
1510a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1511a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
15123966268fSBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
151326fbe8dcSKarl Rupp 
1514a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1515a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1516a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1517a30f8f8cSSatish Balay     info->memory       = irecv[3];
1518a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1519f23aa3ddSBarry Smith   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1520a30f8f8cSSatish Balay   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1521a30f8f8cSSatish Balay   info->fill_ratio_needed = 0;
1522a30f8f8cSSatish Balay   info->factor_mallocs    = 0;
1523a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1524a30f8f8cSSatish Balay }
1525a30f8f8cSSatish Balay 
1526ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1527a30f8f8cSSatish Balay {
1528a30f8f8cSSatish Balay   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1529d0d4cfc2SHong Zhang   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1530dfbe8321SBarry Smith   PetscErrorCode ierr;
1531a30f8f8cSSatish Balay 
1532a30f8f8cSSatish Balay   PetscFunctionBegin;
1533e98b92d7SKris Buschelman   switch (op) {
1534512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
1535e98b92d7SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
153628b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1537a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
1538c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1539e98b92d7SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
154043674050SBarry Smith     MatCheckPreallocated(A,1);
15414e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
15424e0d8c25SBarry Smith     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1543e98b92d7SKris Buschelman     break;
1544e98b92d7SKris Buschelman   case MAT_ROW_ORIENTED:
154543674050SBarry Smith     MatCheckPreallocated(A,1);
15464e0d8c25SBarry Smith     a->roworiented = flg;
154726fbe8dcSKarl Rupp 
15484e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
15494e0d8c25SBarry Smith     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1550e98b92d7SKris Buschelman     break;
15514e0d8c25SBarry Smith   case MAT_NEW_DIAGONALS:
1552071fcb05SBarry Smith   case MAT_SORTED_FULL:
1553290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1554e98b92d7SKris Buschelman     break;
1555e98b92d7SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
15564e0d8c25SBarry Smith     a->donotstash = flg;
1557e98b92d7SKris Buschelman     break;
1558e98b92d7SKris Buschelman   case MAT_USE_HASH_TABLE:
15594e0d8c25SBarry Smith     a->ht_flag = flg;
1560e98b92d7SKris Buschelman     break;
15619a4540c5SBarry Smith   case MAT_HERMITIAN:
156243674050SBarry Smith     MatCheckPreallocated(A,1);
1563eeffb40dSHong Zhang     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
15640f2140c7SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1565eb1ec7c1SStefano Zampini     if (flg) { /* need different mat-vec ops */
1566547795f9SHong Zhang       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1567eb1ec7c1SStefano Zampini       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1568eb1ec7c1SStefano Zampini       A->ops->multtranspose    = NULL;
1569eb1ec7c1SStefano Zampini       A->ops->multtransposeadd = NULL;
1570eb1ec7c1SStefano Zampini       A->symmetric = PETSC_FALSE;
1571eb1ec7c1SStefano Zampini     }
15720f2140c7SStefano Zampini #endif
1573eeffb40dSHong Zhang     break;
1574ffa07934SHong Zhang   case MAT_SPD:
157577e54ba9SKris Buschelman   case MAT_SYMMETRIC:
157643674050SBarry Smith     MatCheckPreallocated(A,1);
1577eeffb40dSHong Zhang     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1578eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1579eb1ec7c1SStefano Zampini     if (flg) { /* restore to use default mat-vec ops */
1580eb1ec7c1SStefano Zampini       A->ops->mult             = MatMult_MPISBAIJ;
1581eb1ec7c1SStefano Zampini       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1582eb1ec7c1SStefano Zampini       A->ops->multtranspose    = MatMult_MPISBAIJ;
1583eb1ec7c1SStefano Zampini       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1584eb1ec7c1SStefano Zampini     }
1585eb1ec7c1SStefano Zampini #endif
1586eeffb40dSHong Zhang     break;
158777e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
158843674050SBarry Smith     MatCheckPreallocated(A,1);
1589eeffb40dSHong Zhang     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1590eeffb40dSHong Zhang     break;
15919a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1592e32f2f54SBarry Smith     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1593290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
159477e54ba9SKris Buschelman     break;
1595d0d4cfc2SHong Zhang   case MAT_IGNORE_LOWER_TRIANGULAR:
15964e0d8c25SBarry Smith     aA->ignore_ltriangular = flg;
1597d0d4cfc2SHong Zhang     break;
1598d0d4cfc2SHong Zhang   case MAT_ERROR_LOWER_TRIANGULAR:
15994e0d8c25SBarry Smith     aA->ignore_ltriangular = flg;
1600d0d4cfc2SHong Zhang     break;
1601d0d4cfc2SHong Zhang   case MAT_GETROW_UPPERTRIANGULAR:
16024e0d8c25SBarry Smith     aA->getrow_utriangular = flg;
1603d0d4cfc2SHong Zhang     break;
1604e98b92d7SKris Buschelman   default:
1605e32f2f54SBarry Smith     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1606a30f8f8cSSatish Balay   }
1607a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1608a30f8f8cSSatish Balay }
1609a30f8f8cSSatish Balay 
1610fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1611a30f8f8cSSatish Balay {
1612dfbe8321SBarry Smith   PetscErrorCode ierr;
16136e111a19SKarl Rupp 
1614a30f8f8cSSatish Balay   PetscFunctionBegin;
1615cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
1616999d9058SBarry Smith     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1617cf37664fSBarry Smith   }  else if (reuse == MAT_REUSE_MATRIX) {
1618cf37664fSBarry Smith     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1619fc4dec0aSBarry Smith   }
16208115998fSBarry Smith   PetscFunctionReturn(0);
1621a30f8f8cSSatish Balay }
1622a30f8f8cSSatish Balay 
1623dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1624a30f8f8cSSatish Balay {
1625a30f8f8cSSatish Balay   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1626a30f8f8cSSatish Balay   Mat            a     = baij->A, b=baij->B;
1627dfbe8321SBarry Smith   PetscErrorCode ierr;
16285e90f9d9SHong Zhang   PetscInt       nv,m,n;
1629ace3abfcSBarry Smith   PetscBool      flg;
1630a30f8f8cSSatish Balay 
1631a30f8f8cSSatish Balay   PetscFunctionBegin;
1632a30f8f8cSSatish Balay   if (ll != rr) {
1633b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1634e7e72b3dSBarry Smith     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1635a30f8f8cSSatish Balay   }
1636b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
1637b3bf805bSHong Zhang 
16385e90f9d9SHong Zhang   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1639e32f2f54SBarry Smith   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1640b3bf805bSHong Zhang 
16415e90f9d9SHong Zhang   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1642e32f2f54SBarry Smith   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
16435e90f9d9SHong Zhang 
1644ca9f406cSSatish Balay   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
16455e90f9d9SHong Zhang 
16465e90f9d9SHong Zhang   /* left diagonalscale the off-diagonal part */
16470298fd71SBarry Smith   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
16485e90f9d9SHong Zhang 
16495e90f9d9SHong Zhang   /* scale the diagonal part */
1650a30f8f8cSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1651a30f8f8cSSatish Balay 
16525e90f9d9SHong Zhang   /* right diagonalscale the off-diagonal part */
1653ca9f406cSSatish Balay   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
16540298fd71SBarry Smith   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1655a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1656a30f8f8cSSatish Balay }
1657a30f8f8cSSatish Balay 
1658dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1659a30f8f8cSSatish Balay {
1660f3566a2aSHong Zhang   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1661dfbe8321SBarry Smith   PetscErrorCode ierr;
1662a30f8f8cSSatish Balay 
1663a30f8f8cSSatish Balay   PetscFunctionBegin;
1664a30f8f8cSSatish Balay   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1665a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1666a30f8f8cSSatish Balay }
1667a30f8f8cSSatish Balay 
16686849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1669a30f8f8cSSatish Balay 
1670ace3abfcSBarry Smith PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1671a30f8f8cSSatish Balay {
1672a30f8f8cSSatish Balay   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1673a30f8f8cSSatish Balay   Mat            a,b,c,d;
1674ace3abfcSBarry Smith   PetscBool      flg;
1675dfbe8321SBarry Smith   PetscErrorCode ierr;
1676a30f8f8cSSatish Balay 
1677a30f8f8cSSatish Balay   PetscFunctionBegin;
1678a30f8f8cSSatish Balay   a = matA->A; b = matA->B;
1679a30f8f8cSSatish Balay   c = matB->A; d = matB->B;
1680a30f8f8cSSatish Balay 
1681a30f8f8cSSatish Balay   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1682abc0a331SBarry Smith   if (flg) {
1683a30f8f8cSSatish Balay     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1684a30f8f8cSSatish Balay   }
1685b2566f29SBarry Smith   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1686a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1687a30f8f8cSSatish Balay }
1688a30f8f8cSSatish Balay 
16893c896bc6SHong Zhang PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
16903c896bc6SHong Zhang {
16913c896bc6SHong Zhang   PetscErrorCode ierr;
16924c7a3774SStefano Zampini   PetscBool      isbaij;
16933c896bc6SHong Zhang 
16943c896bc6SHong Zhang   PetscFunctionBegin;
16954c7a3774SStefano Zampini   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
16964c7a3774SStefano Zampini   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
16973c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
16983c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1699d0d4cfc2SHong Zhang     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
17003c896bc6SHong Zhang     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1701d0d4cfc2SHong Zhang     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
17023c896bc6SHong Zhang   } else {
17034c7a3774SStefano Zampini     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
17044c7a3774SStefano Zampini     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
17054c7a3774SStefano Zampini 
17063c896bc6SHong Zhang     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
17073c896bc6SHong Zhang     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
17083c896bc6SHong Zhang   }
1709cdc753b6SBarry Smith   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
17103c896bc6SHong Zhang   PetscFunctionReturn(0);
17113c896bc6SHong Zhang }
17123c896bc6SHong Zhang 
17134994cf47SJed Brown PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1714273d9f13SBarry Smith {
1715dfbe8321SBarry Smith   PetscErrorCode ierr;
1716273d9f13SBarry Smith 
1717273d9f13SBarry Smith   PetscFunctionBegin;
1718535b19f3SBarry Smith   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1719273d9f13SBarry Smith   PetscFunctionReturn(0);
1720273d9f13SBarry Smith }
1721a5e6ed63SBarry Smith 
17224fe895cdSHong Zhang PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
17234fe895cdSHong Zhang {
17244fe895cdSHong Zhang   PetscErrorCode ierr;
17254fe895cdSHong Zhang   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
17264fe895cdSHong Zhang   PetscBLASInt   bnz,one=1;
17274fe895cdSHong Zhang   Mat_SeqSBAIJ   *xa,*ya;
17284fe895cdSHong Zhang   Mat_SeqBAIJ    *xb,*yb;
17294fe895cdSHong Zhang 
17304fe895cdSHong Zhang   PetscFunctionBegin;
17314fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
17324fe895cdSHong Zhang     PetscScalar alpha = a;
17334fe895cdSHong Zhang     xa   = (Mat_SeqSBAIJ*)xx->A->data;
17344fe895cdSHong Zhang     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1735c5df96a5SBarry Smith     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
17368b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
17374fe895cdSHong Zhang     xb   = (Mat_SeqBAIJ*)xx->B->data;
17384fe895cdSHong Zhang     yb   = (Mat_SeqBAIJ*)yy->B->data;
1739c5df96a5SBarry Smith     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
17408b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1741a3fa217bSJose E. Roman     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1742ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1743ab784542SHong Zhang     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1744ab784542SHong Zhang     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1745ab784542SHong Zhang     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
17464fe895cdSHong Zhang   } else {
17474de5dceeSHong Zhang     Mat      B;
17484de5dceeSHong Zhang     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
17494de5dceeSHong Zhang     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1750d0d4cfc2SHong Zhang     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
17514de5dceeSHong Zhang     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
17524de5dceeSHong Zhang     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
17534de5dceeSHong Zhang     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
17544de5dceeSHong Zhang     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
17554de5dceeSHong Zhang     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
17564de5dceeSHong Zhang     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
17574de5dceeSHong Zhang     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
17584de5dceeSHong Zhang     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
17594de5dceeSHong Zhang     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
17604de5dceeSHong Zhang     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
17614de5dceeSHong Zhang     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
17624de5dceeSHong Zhang     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
176328be2f97SBarry Smith     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
17644de5dceeSHong Zhang     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
17654de5dceeSHong Zhang     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1766d0d4cfc2SHong Zhang     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
17674de5dceeSHong Zhang     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
17684fe895cdSHong Zhang   }
17694fe895cdSHong Zhang   PetscFunctionReturn(0);
17704fe895cdSHong Zhang }
17714fe895cdSHong Zhang 
17727dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1773a5e6ed63SBarry Smith {
17746849ba73SBarry Smith   PetscErrorCode ierr;
17751302d50aSBarry Smith   PetscInt       i;
1776afebec48SHong Zhang   PetscBool      flg;
1777a5e6ed63SBarry Smith 
17786849ba73SBarry Smith   PetscFunctionBegin;
17797dae84e0SHong Zhang   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1780a5e6ed63SBarry Smith   for (i=0; i<n; i++) {
1781a5e6ed63SBarry Smith     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1782afebec48SHong Zhang     if (!flg) {
1783b2fa50c1SHong Zhang       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1784a5e6ed63SBarry Smith     }
17854dcd73b1SHong Zhang   }
1786a5e6ed63SBarry Smith   PetscFunctionReturn(0);
1787a5e6ed63SBarry Smith }
1788a5e6ed63SBarry Smith 
17897d68702bSBarry Smith PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
17907d68702bSBarry Smith {
17917d68702bSBarry Smith   PetscErrorCode ierr;
17927d68702bSBarry Smith   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
17936f33a894SBarry Smith   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
17947d68702bSBarry Smith 
17957d68702bSBarry Smith   PetscFunctionBegin;
17966f33a894SBarry Smith   if (!Y->preallocated) {
17977d68702bSBarry Smith     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
17986f33a894SBarry Smith   } else if (!aij->nz) {
1799b83222d8SBarry Smith     PetscInt nonew = aij->nonew;
18006f33a894SBarry Smith     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1801b83222d8SBarry Smith     aij->nonew = nonew;
18027d68702bSBarry Smith   }
18037d68702bSBarry Smith   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
18047d68702bSBarry Smith   PetscFunctionReturn(0);
18057d68702bSBarry Smith }
18067d68702bSBarry Smith 
18073b49f96aSBarry Smith PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
18083b49f96aSBarry Smith {
18093b49f96aSBarry Smith   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
18103b49f96aSBarry Smith   PetscErrorCode ierr;
18113b49f96aSBarry Smith 
18123b49f96aSBarry Smith   PetscFunctionBegin;
18133b49f96aSBarry Smith   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
18143b49f96aSBarry Smith   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
18153b49f96aSBarry Smith   if (d) {
18163b49f96aSBarry Smith     PetscInt rstart;
18173b49f96aSBarry Smith     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
18183b49f96aSBarry Smith     *d += rstart/A->rmap->bs;
18193b49f96aSBarry Smith 
18203b49f96aSBarry Smith   }
18213b49f96aSBarry Smith   PetscFunctionReturn(0);
18223b49f96aSBarry Smith }
18233b49f96aSBarry Smith 
1824a5b7ff6bSBarry Smith PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1825a5b7ff6bSBarry Smith {
1826a5b7ff6bSBarry Smith   PetscFunctionBegin;
1827a5b7ff6bSBarry Smith   *a = ((Mat_MPISBAIJ*)A->data)->A;
1828a5b7ff6bSBarry Smith   PetscFunctionReturn(0);
1829a5b7ff6bSBarry Smith }
18303b49f96aSBarry Smith 
1831a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/
18323964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1833a30f8f8cSSatish Balay                                        MatGetRow_MPISBAIJ,
1834a30f8f8cSSatish Balay                                        MatRestoreRow_MPISBAIJ,
1835a9d4b620SHong Zhang                                        MatMult_MPISBAIJ,
183697304618SKris Buschelman                                /*  4*/ MatMultAdd_MPISBAIJ,
1837431c96f7SBarry Smith                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1838431c96f7SBarry Smith                                        MatMultAdd_MPISBAIJ,
1839a30f8f8cSSatish Balay                                        0,
1840a30f8f8cSSatish Balay                                        0,
1841a30f8f8cSSatish Balay                                        0,
184297304618SKris Buschelman                                /* 10*/ 0,
1843a30f8f8cSSatish Balay                                        0,
1844a30f8f8cSSatish Balay                                        0,
184541f059aeSBarry Smith                                        MatSOR_MPISBAIJ,
1846a30f8f8cSSatish Balay                                        MatTranspose_MPISBAIJ,
184797304618SKris Buschelman                                /* 15*/ MatGetInfo_MPISBAIJ,
1848a30f8f8cSSatish Balay                                        MatEqual_MPISBAIJ,
1849a30f8f8cSSatish Balay                                        MatGetDiagonal_MPISBAIJ,
1850a30f8f8cSSatish Balay                                        MatDiagonalScale_MPISBAIJ,
1851a30f8f8cSSatish Balay                                        MatNorm_MPISBAIJ,
185297304618SKris Buschelman                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1853a30f8f8cSSatish Balay                                        MatAssemblyEnd_MPISBAIJ,
1854a30f8f8cSSatish Balay                                        MatSetOption_MPISBAIJ,
1855a30f8f8cSSatish Balay                                        MatZeroEntries_MPISBAIJ,
1856d519adbfSMatthew Knepley                                /* 24*/ 0,
1857a30f8f8cSSatish Balay                                        0,
1858a30f8f8cSSatish Balay                                        0,
1859a30f8f8cSSatish Balay                                        0,
1860a30f8f8cSSatish Balay                                        0,
18614994cf47SJed Brown                                /* 29*/ MatSetUp_MPISBAIJ,
1862b5df2d14SHong Zhang                                        0,
1863a30f8f8cSSatish Balay                                        0,
1864a5b7ff6bSBarry Smith                                        MatGetDiagonalBlock_MPISBAIJ,
1865a30f8f8cSSatish Balay                                        0,
1866d519adbfSMatthew Knepley                                /* 34*/ MatDuplicate_MPISBAIJ,
1867a30f8f8cSSatish Balay                                        0,
1868a30f8f8cSSatish Balay                                        0,
1869a30f8f8cSSatish Balay                                        0,
1870a30f8f8cSSatish Balay                                        0,
1871d519adbfSMatthew Knepley                                /* 39*/ MatAXPY_MPISBAIJ,
18727dae84e0SHong Zhang                                        MatCreateSubMatrices_MPISBAIJ,
1873d94109b8SHong Zhang                                        MatIncreaseOverlap_MPISBAIJ,
1874a30f8f8cSSatish Balay                                        MatGetValues_MPISBAIJ,
18753c896bc6SHong Zhang                                        MatCopy_MPISBAIJ,
1876d519adbfSMatthew Knepley                                /* 44*/ 0,
1877a30f8f8cSSatish Balay                                        MatScale_MPISBAIJ,
18787d68702bSBarry Smith                                        MatShift_MPISBAIJ,
1879a30f8f8cSSatish Balay                                        0,
1880a30f8f8cSSatish Balay                                        0,
1881f73d5cc4SBarry Smith                                /* 49*/ 0,
1882a30f8f8cSSatish Balay                                        0,
1883a30f8f8cSSatish Balay                                        0,
1884a30f8f8cSSatish Balay                                        0,
1885a30f8f8cSSatish Balay                                        0,
1886d519adbfSMatthew Knepley                                /* 54*/ 0,
1887a30f8f8cSSatish Balay                                        0,
1888a30f8f8cSSatish Balay                                        MatSetUnfactored_MPISBAIJ,
1889a30f8f8cSSatish Balay                                        0,
1890a30f8f8cSSatish Balay                                        MatSetValuesBlocked_MPISBAIJ,
18917dae84e0SHong Zhang                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1892a30f8f8cSSatish Balay                                        0,
1893a30f8f8cSSatish Balay                                        0,
1894357abbc8SBarry Smith                                        0,
189524d5174aSHong Zhang                                        0,
1896d519adbfSMatthew Knepley                                /* 64*/ 0,
189724d5174aSHong Zhang                                        0,
189824d5174aSHong Zhang                                        0,
189924d5174aSHong Zhang                                        0,
190024d5174aSHong Zhang                                        0,
1901d519adbfSMatthew Knepley                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
190224d5174aSHong Zhang                                        0,
190328d58a37SPierre Jolivet                                        MatConvert_MPISBAIJ_Basic,
190497304618SKris Buschelman                                        0,
190597304618SKris Buschelman                                        0,
1906d519adbfSMatthew Knepley                                /* 74*/ 0,
190797304618SKris Buschelman                                        0,
190897304618SKris Buschelman                                        0,
190997304618SKris Buschelman                                        0,
191097304618SKris Buschelman                                        0,
1911d519adbfSMatthew Knepley                                /* 79*/ 0,
191297304618SKris Buschelman                                        0,
191397304618SKris Buschelman                                        0,
191497304618SKris Buschelman                                        0,
19155bba2384SShri Abhyankar                                        MatLoad_MPISBAIJ,
1916d519adbfSMatthew Knepley                                /* 84*/ 0,
1917865e5f61SKris Buschelman                                        0,
1918865e5f61SKris Buschelman                                        0,
1919865e5f61SKris Buschelman                                        0,
1920865e5f61SKris Buschelman                                        0,
1921d519adbfSMatthew Knepley                                /* 89*/ 0,
1922865e5f61SKris Buschelman                                        0,
1923865e5f61SKris Buschelman                                        0,
1924865e5f61SKris Buschelman                                        0,
1925865e5f61SKris Buschelman                                        0,
1926d519adbfSMatthew Knepley                                /* 94*/ 0,
1927865e5f61SKris Buschelman                                        0,
1928865e5f61SKris Buschelman                                        0,
192999cafbc1SBarry Smith                                        0,
193099cafbc1SBarry Smith                                        0,
1931d519adbfSMatthew Knepley                                /* 99*/ 0,
193299cafbc1SBarry Smith                                        0,
193399cafbc1SBarry Smith                                        0,
193499cafbc1SBarry Smith                                        0,
193599cafbc1SBarry Smith                                        0,
1936d519adbfSMatthew Knepley                                /*104*/ 0,
193799cafbc1SBarry Smith                                        MatRealPart_MPISBAIJ,
1938d0d4cfc2SHong Zhang                                        MatImaginaryPart_MPISBAIJ,
1939d0d4cfc2SHong Zhang                                        MatGetRowUpperTriangular_MPISBAIJ,
194095936485SShri Abhyankar                                        MatRestoreRowUpperTriangular_MPISBAIJ,
194195936485SShri Abhyankar                                /*109*/ 0,
194295936485SShri Abhyankar                                        0,
194395936485SShri Abhyankar                                        0,
194495936485SShri Abhyankar                                        0,
19453b49f96aSBarry Smith                                        MatMissingDiagonal_MPISBAIJ,
194695936485SShri Abhyankar                                /*114*/ 0,
194795936485SShri Abhyankar                                        0,
194895936485SShri Abhyankar                                        0,
194995936485SShri Abhyankar                                        0,
195095936485SShri Abhyankar                                        0,
195195936485SShri Abhyankar                                /*119*/ 0,
195295936485SShri Abhyankar                                        0,
195395936485SShri Abhyankar                                        0,
19543964eb88SJed Brown                                        0,
19553964eb88SJed Brown                                        0,
19563964eb88SJed Brown                                /*124*/ 0,
19573964eb88SJed Brown                                        0,
19583964eb88SJed Brown                                        0,
19593964eb88SJed Brown                                        0,
19603964eb88SJed Brown                                        0,
19613964eb88SJed Brown                                /*129*/ 0,
19623964eb88SJed Brown                                        0,
19633964eb88SJed Brown                                        0,
19643964eb88SJed Brown                                        0,
19653964eb88SJed Brown                                        0,
19663964eb88SJed Brown                                /*134*/ 0,
19673964eb88SJed Brown                                        0,
19683964eb88SJed Brown                                        0,
19693964eb88SJed Brown                                        0,
19703964eb88SJed Brown                                        0,
197146533700Sstefano_zampini                                /*139*/ MatSetBlockSizes_Default,
1972f9426fe0SMark Adams                                        0,
197359f5e6ceSHong Zhang                                        0,
197459f5e6ceSHong Zhang                                        0,
197559f5e6ceSHong Zhang                                        0,
197659f5e6ceSHong Zhang                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
197799cafbc1SBarry Smith };
1978a30f8f8cSSatish Balay 
1979b2573a8aSBarry Smith PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1980a23d5eceSKris Buschelman {
1981476417e5SBarry Smith   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1982dfbe8321SBarry Smith   PetscErrorCode ierr;
1983535b19f3SBarry Smith   PetscInt       i,mbs,Mbs;
19845d2a9ed1SStefano Zampini   PetscMPIInt    size;
1985a23d5eceSKris Buschelman 
1986a23d5eceSKris Buschelman   PetscFunctionBegin;
198733d57670SJed Brown   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
198826283091SBarry Smith   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
198926283091SBarry Smith   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1990e02043d6SBarry Smith   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1991476417e5SBarry Smith   if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
1992476417e5SBarry Smith   if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
1993899cda47SBarry Smith 
1994d0f46423SBarry Smith   mbs = B->rmap->n/bs;
1995d0f46423SBarry Smith   Mbs = B->rmap->N/bs;
1996c2fc9fa9SBarry Smith   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1997a23d5eceSKris Buschelman 
1998d0f46423SBarry Smith   B->rmap->bs = bs;
1999a23d5eceSKris Buschelman   b->bs2      = bs*bs;
2000a23d5eceSKris Buschelman   b->mbs      = mbs;
2001a23d5eceSKris Buschelman   b->Mbs      = Mbs;
2002de64b629SHong Zhang   b->nbs      = B->cmap->n/bs;
2003de64b629SHong Zhang   b->Nbs      = B->cmap->N/bs;
2004a23d5eceSKris Buschelman 
2005a23d5eceSKris Buschelman   for (i=0; i<=b->size; i++) {
2006d0f46423SBarry Smith     b->rangebs[i] = B->rmap->range[i]/bs;
2007a23d5eceSKris Buschelman   }
2008d0f46423SBarry Smith   b->rstartbs = B->rmap->rstart/bs;
2009d0f46423SBarry Smith   b->rendbs   = B->rmap->rend/bs;
2010a23d5eceSKris Buschelman 
2011d0f46423SBarry Smith   b->cstartbs = B->cmap->rstart/bs;
2012d0f46423SBarry Smith   b->cendbs   = B->cmap->rend/bs;
2013a23d5eceSKris Buschelman 
2014cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE)
2015cb7b82ddSBarry Smith   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2016cb7b82ddSBarry Smith #else
2017cb7b82ddSBarry Smith   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2018cb7b82ddSBarry Smith #endif
2019cb7b82ddSBarry Smith   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2020cb7b82ddSBarry Smith   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2021cb7b82ddSBarry Smith   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2022cb7b82ddSBarry Smith   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2023cb7b82ddSBarry Smith   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2024cb7b82ddSBarry Smith   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2025cb7b82ddSBarry Smith   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2026cb7b82ddSBarry Smith   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2027cb7b82ddSBarry Smith   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2028cb7b82ddSBarry Smith 
2029cb7b82ddSBarry Smith   /* Because the B will have been resized we simply destroy it and create a new one each time */
20305d2a9ed1SStefano Zampini   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2031cb7b82ddSBarry Smith   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2032cb7b82ddSBarry Smith   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
20335d2a9ed1SStefano Zampini   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2034cb7b82ddSBarry Smith   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2035cb7b82ddSBarry Smith   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2036cb7b82ddSBarry Smith 
2037526dfc15SBarry Smith   if (!B->preallocated) {
2038f69a0ea3SMatthew Knepley     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2039d0f46423SBarry Smith     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
20409c097c71SKris Buschelman     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
20413bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2042ce94432eSBarry Smith     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2043526dfc15SBarry Smith   }
2044a23d5eceSKris Buschelman 
2045526dfc15SBarry Smith   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2046526dfc15SBarry Smith   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
204726fbe8dcSKarl Rupp 
2048526dfc15SBarry Smith   B->preallocated  = PETSC_TRUE;
2049cb7b82ddSBarry Smith   B->was_assembled = PETSC_FALSE;
2050cb7b82ddSBarry Smith   B->assembled     = PETSC_FALSE;
2051a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2052a23d5eceSKris Buschelman }
2053a23d5eceSKris Buschelman 
2054dfb205c3SBarry Smith PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2055dfb205c3SBarry Smith {
205602106b30SBarry Smith   PetscInt       m,rstart,cend;
20570cd7f59aSBarry Smith   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0;
2058dfb205c3SBarry Smith   const PetscInt *JJ    =0;
2059dfb205c3SBarry Smith   PetscScalar    *values=0;
2060bb80cfbbSStefano Zampini   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2061dfb205c3SBarry Smith   PetscErrorCode ierr;
20623bd0feecSPierre Jolivet   PetscBool      nooffprocentries;
2063dfb205c3SBarry Smith 
2064dfb205c3SBarry Smith   PetscFunctionBegin;
2065ce94432eSBarry Smith   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2066dfb205c3SBarry Smith   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2067dfb205c3SBarry Smith   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2068dfb205c3SBarry Smith   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2069dfb205c3SBarry Smith   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2070e02043d6SBarry Smith   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2071dfb205c3SBarry Smith   m      = B->rmap->n/bs;
2072dfb205c3SBarry Smith   rstart = B->rmap->rstart/bs;
2073dfb205c3SBarry Smith   cend   = B->cmap->rend/bs;
2074dfb205c3SBarry Smith 
2075dfb205c3SBarry Smith   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2076dcca6d9dSJed Brown   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2077dfb205c3SBarry Smith   for (i=0; i<m; i++) {
2078dfb205c3SBarry Smith     nz = ii[i+1] - ii[i];
2079dfb205c3SBarry Smith     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
20800cd7f59aSBarry Smith     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2081dfb205c3SBarry Smith     JJ     = jj + ii[i];
20820cd7f59aSBarry Smith     bd     = 0;
2083dfb205c3SBarry Smith     for (j=0; j<nz; j++) {
20840cd7f59aSBarry Smith       if (*JJ >= i + rstart) break;
2085dfb205c3SBarry Smith       JJ++;
20860cd7f59aSBarry Smith       bd++;
2087dfb205c3SBarry Smith     }
2088dfb205c3SBarry Smith     d  = 0;
2089dfb205c3SBarry Smith     for (; j<nz; j++) {
2090dfb205c3SBarry Smith       if (*JJ++ >= cend) break;
2091dfb205c3SBarry Smith       d++;
2092dfb205c3SBarry Smith     }
2093dfb205c3SBarry Smith     d_nnz[i] = d;
20940cd7f59aSBarry Smith     o_nnz[i] = nz - d - bd;
20950cd7f59aSBarry Smith     nz       = nz - bd;
20960cd7f59aSBarry Smith     nz_max = PetscMax(nz_max,nz);
2097dfb205c3SBarry Smith   }
2098dfb205c3SBarry Smith   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2099bb80cfbbSStefano Zampini   ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2100dfb205c3SBarry Smith   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2101dfb205c3SBarry Smith 
2102dfb205c3SBarry Smith   values = (PetscScalar*)V;
2103dfb205c3SBarry Smith   if (!values) {
2104580bdb30SBarry Smith     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2105dfb205c3SBarry Smith   }
2106dfb205c3SBarry Smith   for (i=0; i<m; i++) {
2107dfb205c3SBarry Smith     PetscInt          row    = i + rstart;
2108dfb205c3SBarry Smith     PetscInt          ncols  = ii[i+1] - ii[i];
2109dfb205c3SBarry Smith     const PetscInt    *icols = jj + ii[i];
2110bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2111dfb205c3SBarry Smith       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2112dfb205c3SBarry Smith       ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2113bb80cfbbSStefano Zampini     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2114bb80cfbbSStefano Zampini       PetscInt j;
21150cd7f59aSBarry Smith       for (j=0; j<ncols; j++) {
21160cd7f59aSBarry Smith         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
21170cd7f59aSBarry Smith         ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
21180cd7f59aSBarry Smith       }
21190cd7f59aSBarry Smith     }
2120dfb205c3SBarry Smith   }
2121dfb205c3SBarry Smith 
2122dfb205c3SBarry Smith   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
21233bd0feecSPierre Jolivet   nooffprocentries    = B->nooffprocentries;
21243bd0feecSPierre Jolivet   B->nooffprocentries = PETSC_TRUE;
2125dfb205c3SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2126dfb205c3SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
21273bd0feecSPierre Jolivet   B->nooffprocentries = nooffprocentries;
21283bd0feecSPierre Jolivet 
21297827cd58SJed Brown   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2130dfb205c3SBarry Smith   PetscFunctionReturn(0);
2131dfb205c3SBarry Smith }
2132dfb205c3SBarry Smith 
21330bad9183SKris Buschelman /*MC
2134fafad747SKris Buschelman    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2135828413b8SBarry Smith    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2136828413b8SBarry Smith    the matrix is stored.
2137828413b8SBarry Smith 
2138828413b8SBarry Smith    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2139828413b8SBarry Smith    can call MatSetOption(Mat, MAT_HERMITIAN);
21400bad9183SKris Buschelman 
21410bad9183SKris Buschelman    Options Database Keys:
21420bad9183SKris Buschelman . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
21430bad9183SKris Buschelman 
2144476417e5SBarry Smith    Notes:
2145476417e5SBarry Smith      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2146476417e5SBarry Smith      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2147476417e5SBarry Smith 
21480bad9183SKris Buschelman    Level: beginner
21490bad9183SKris Buschelman 
2150fd292e60Sprj- .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType
21510bad9183SKris Buschelman M*/
21520bad9183SKris Buschelman 
21538cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2154b5df2d14SHong Zhang {
2155b5df2d14SHong Zhang   Mat_MPISBAIJ   *b;
2156dfbe8321SBarry Smith   PetscErrorCode ierr;
215794ae4db5SBarry Smith   PetscBool      flg = PETSC_FALSE;
2158b5df2d14SHong Zhang 
2159b5df2d14SHong Zhang   PetscFunctionBegin;
2160b00a9115SJed Brown   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2161b0a32e0cSBarry Smith   B->data = (void*)b;
2162b5df2d14SHong Zhang   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2163b5df2d14SHong Zhang 
2164b5df2d14SHong Zhang   B->ops->destroy = MatDestroy_MPISBAIJ;
2165b5df2d14SHong Zhang   B->ops->view    = MatView_MPISBAIJ;
2166b5df2d14SHong Zhang   B->assembled    = PETSC_FALSE;
2167b5df2d14SHong Zhang   B->insertmode   = NOT_SET_VALUES;
216826fbe8dcSKarl Rupp 
2169ce94432eSBarry Smith   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2170ce94432eSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2171b5df2d14SHong Zhang 
2172b5df2d14SHong Zhang   /* build local table of row and column ownerships */
2173854ce69bSBarry Smith   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2174b5df2d14SHong Zhang 
2175b5df2d14SHong Zhang   /* build cache for off array entries formed */
2176ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
217726fbe8dcSKarl Rupp 
2178b5df2d14SHong Zhang   b->donotstash  = PETSC_FALSE;
21790298fd71SBarry Smith   b->colmap      = NULL;
21800298fd71SBarry Smith   b->garray      = NULL;
2181b5df2d14SHong Zhang   b->roworiented = PETSC_TRUE;
2182b5df2d14SHong Zhang 
2183b5df2d14SHong Zhang   /* stuff used in block assembly */
2184b5df2d14SHong Zhang   b->barray = 0;
2185b5df2d14SHong Zhang 
2186b5df2d14SHong Zhang   /* stuff used for matrix vector multiply */
2187b5df2d14SHong Zhang   b->lvec    = 0;
2188b5df2d14SHong Zhang   b->Mvctx   = 0;
218940781036SHong Zhang   b->slvec0  = 0;
219040781036SHong Zhang   b->slvec0b = 0;
219140781036SHong Zhang   b->slvec1  = 0;
219240781036SHong Zhang   b->slvec1a = 0;
219340781036SHong Zhang   b->slvec1b = 0;
219440781036SHong Zhang   b->sMvctx  = 0;
2195b5df2d14SHong Zhang 
2196b5df2d14SHong Zhang   /* stuff for MatGetRow() */
2197b5df2d14SHong Zhang   b->rowindices   = 0;
2198b5df2d14SHong Zhang   b->rowvalues    = 0;
2199b5df2d14SHong Zhang   b->getrowactive = PETSC_FALSE;
2200b5df2d14SHong Zhang 
2201b5df2d14SHong Zhang   /* hash table stuff */
2202b5df2d14SHong Zhang   b->ht           = 0;
2203b5df2d14SHong Zhang   b->hd           = 0;
2204b5df2d14SHong Zhang   b->ht_size      = 0;
2205b5df2d14SHong Zhang   b->ht_flag      = PETSC_FALSE;
2206b5df2d14SHong Zhang   b->ht_fact      = 0;
2207b5df2d14SHong Zhang   b->ht_total_ct  = 0;
2208b5df2d14SHong Zhang   b->ht_insert_ct = 0;
2209b5df2d14SHong Zhang 
22107dae84e0SHong Zhang   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
22117a868f3eSHong Zhang   b->ijonly = PETSC_FALSE;
22127a868f3eSHong Zhang 
221359ffdab8SBarry Smith   b->in_loc = 0;
221459ffdab8SBarry Smith   b->v_loc  = 0;
221559ffdab8SBarry Smith   b->n_loc  = 0;
221694ae4db5SBarry Smith 
2217bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2218bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2219bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2220bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
22216214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
22226214f412SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
22236214f412SHong Zhang #endif
222428d58a37SPierre Jolivet   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
222528d58a37SPierre Jolivet   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2226aa5a9175SDahai Guo 
222723ce1328SBarry Smith   B->symmetric                  = PETSC_TRUE;
222823ce1328SBarry Smith   B->structurally_symmetric     = PETSC_TRUE;
222923ce1328SBarry Smith   B->symmetric_set              = PETSC_TRUE;
223023ce1328SBarry Smith   B->structurally_symmetric_set = PETSC_TRUE;
22319899f194SHong Zhang   B->symmetric_eternal          = PETSC_TRUE;
2232eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
223313647f61SHong Zhang   B->hermitian                  = PETSC_FALSE;
223413647f61SHong Zhang   B->hermitian_set              = PETSC_FALSE;
2235eb1ec7c1SStefano Zampini #else
2236eb1ec7c1SStefano Zampini   B->hermitian                  = PETSC_TRUE;
2237eb1ec7c1SStefano Zampini   B->hermitian_set              = PETSC_TRUE;
2238eb1ec7c1SStefano Zampini #endif
223913647f61SHong Zhang 
224017667f90SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
224194ae4db5SBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
224294ae4db5SBarry Smith   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
224394ae4db5SBarry Smith   if (flg) {
224494ae4db5SBarry Smith     PetscReal fact = 1.39;
224594ae4db5SBarry Smith     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
224694ae4db5SBarry Smith     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
224794ae4db5SBarry Smith     if (fact <= 1.0) fact = 1.39;
224894ae4db5SBarry Smith     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
224994ae4db5SBarry Smith     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
225094ae4db5SBarry Smith   }
225194ae4db5SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2252b5df2d14SHong Zhang   PetscFunctionReturn(0);
2253b5df2d14SHong Zhang }
2254b5df2d14SHong Zhang 
2255209238afSKris Buschelman /*MC
2256002d173eSKris Buschelman    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2257209238afSKris Buschelman 
2258209238afSKris Buschelman    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2259209238afSKris Buschelman    and MATMPISBAIJ otherwise.
2260209238afSKris Buschelman 
2261209238afSKris Buschelman    Options Database Keys:
2262209238afSKris Buschelman . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2263209238afSKris Buschelman 
2264209238afSKris Buschelman   Level: beginner
2265209238afSKris Buschelman 
2266209238afSKris Buschelman .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ
2267209238afSKris Buschelman M*/
2268209238afSKris Buschelman 
2269b5df2d14SHong Zhang /*@C
2270b5df2d14SHong Zhang    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2271b5df2d14SHong Zhang    the user should preallocate the matrix storage by setting the parameters
2272b5df2d14SHong Zhang    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2273b5df2d14SHong Zhang    performance can be increased by more than a factor of 50.
2274b5df2d14SHong Zhang 
2275b5df2d14SHong Zhang    Collective on Mat
2276b5df2d14SHong Zhang 
2277b5df2d14SHong Zhang    Input Parameters:
22781c4f3114SJed Brown +  B - the matrix
2279bb7ae925SBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2280bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2281b5df2d14SHong Zhang .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2282b5df2d14SHong Zhang            submatrix  (same for all local rows)
2283b5df2d14SHong Zhang .  d_nnz - array containing the number of block nonzeros in the various block rows
22846d10fdaeSSatish Balay            in the upper triangular and diagonal part of the in diagonal portion of the local
22850298fd71SBarry Smith            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
228695742e49SBarry Smith            for the diagonal entry and set a value even if it is zero.
2287b5df2d14SHong Zhang .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2288b5df2d14SHong Zhang            submatrix (same for all local rows).
2289b5df2d14SHong Zhang -  o_nnz - array containing the number of nonzeros in the various block rows of the
2290c2fc9fa9SBarry Smith            off-diagonal portion of the local submatrix that is right of the diagonal
22910298fd71SBarry Smith            (possibly different for each block row) or NULL.
2292b5df2d14SHong Zhang 
2293b5df2d14SHong Zhang 
2294b5df2d14SHong Zhang    Options Database Keys:
2295a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
2296b5df2d14SHong Zhang                      block calculations (much slower)
2297a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
2298b5df2d14SHong Zhang 
2299b5df2d14SHong Zhang    Notes:
2300b5df2d14SHong Zhang 
2301b5df2d14SHong Zhang    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2302b5df2d14SHong Zhang    than it must be used on all processors that share the object for that argument.
2303b5df2d14SHong Zhang 
230449a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
230549a6f317SBarry Smith 
2306b5df2d14SHong Zhang    Storage Information:
2307b5df2d14SHong Zhang    For a square global matrix we define each processor's diagonal portion
2308b5df2d14SHong Zhang    to be its local rows and the corresponding columns (a square submatrix);
2309b5df2d14SHong Zhang    each processor's off-diagonal portion encompasses the remainder of the
2310b5df2d14SHong Zhang    local matrix (a rectangular submatrix).
2311b5df2d14SHong Zhang 
2312b5df2d14SHong Zhang    The user can specify preallocated storage for the diagonal part of
2313b5df2d14SHong Zhang    the local submatrix with either d_nz or d_nnz (not both).  Set
23140298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2315b5df2d14SHong Zhang    memory allocation.  Likewise, specify preallocated storage for the
2316b5df2d14SHong Zhang    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2317b5df2d14SHong Zhang 
2318aa95bbe8SBarry Smith    You can call MatGetInfo() to get information on how effective the preallocation was;
2319aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2320aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
2321aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
2322aa95bbe8SBarry Smith 
2323b5df2d14SHong Zhang    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2324b5df2d14SHong Zhang    the figure below we depict these three local rows and all columns (0-11).
2325b5df2d14SHong Zhang 
2326b5df2d14SHong Zhang .vb
2327b5df2d14SHong Zhang            0 1 2 3 4 5 6 7 8 9 10 11
2328a4b1a0f6SJed Brown           --------------------------
2329c2fc9fa9SBarry Smith    row 3  |. . . d d d o o o o  o  o
2330c2fc9fa9SBarry Smith    row 4  |. . . d d d o o o o  o  o
2331c2fc9fa9SBarry Smith    row 5  |. . . d d d o o o o  o  o
2332a4b1a0f6SJed Brown           --------------------------
2333b5df2d14SHong Zhang .ve
2334b5df2d14SHong Zhang 
2335b5df2d14SHong Zhang    Thus, any entries in the d locations are stored in the d (diagonal)
2336b5df2d14SHong Zhang    submatrix, and any entries in the o locations are stored in the
23376d10fdaeSSatish Balay    o (off-diagonal) submatrix.  Note that the d matrix is stored in
23386d10fdaeSSatish Balay    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2339b5df2d14SHong Zhang 
23406d10fdaeSSatish Balay    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
23416d10fdaeSSatish Balay    plus the diagonal part of the d matrix,
2342c2fc9fa9SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix
2343c2fc9fa9SBarry Smith 
2344b5df2d14SHong Zhang    In general, for PDE problems in which most nonzeros are near the diagonal,
2345b5df2d14SHong Zhang    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2346b5df2d14SHong Zhang    or you will get TERRIBLE performance; see the users' manual chapter on
2347b5df2d14SHong Zhang    matrices.
2348b5df2d14SHong Zhang 
2349b5df2d14SHong Zhang    Level: intermediate
2350b5df2d14SHong Zhang 
2351ab978733SBarry Smith .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2352b5df2d14SHong Zhang @*/
23537087cfbeSBarry Smith PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2354b5df2d14SHong Zhang {
23554ac538c5SBarry Smith   PetscErrorCode ierr;
2356b5df2d14SHong Zhang 
2357b5df2d14SHong Zhang   PetscFunctionBegin;
23586ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
23596ba663aaSJed Brown   PetscValidType(B,1);
23606ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
23614ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2362b5df2d14SHong Zhang   PetscFunctionReturn(0);
2363b5df2d14SHong Zhang }
2364b5df2d14SHong Zhang 
2365a30f8f8cSSatish Balay /*@C
236669b1f4b7SBarry Smith    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2367a30f8f8cSSatish Balay    (block compressed row).  For good matrix assembly performance
2368a30f8f8cSSatish Balay    the user should preallocate the matrix storage by setting the parameters
2369a30f8f8cSSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2370a30f8f8cSSatish Balay    performance can be increased by more than a factor of 50.
2371a30f8f8cSSatish Balay 
2372d083f849SBarry Smith    Collective
2373a30f8f8cSSatish Balay 
2374a30f8f8cSSatish Balay    Input Parameters:
2375a30f8f8cSSatish Balay +  comm - MPI communicator
2376bb7ae925SBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2377bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2378a30f8f8cSSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2379a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2380a30f8f8cSSatish Balay            y vector for the matrix-vector product y = Ax.
2381a30f8f8cSSatish Balay .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2382a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2383a30f8f8cSSatish Balay            x vector for the matrix-vector product y = Ax.
2384a30f8f8cSSatish Balay .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2385a30f8f8cSSatish Balay .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2386a30f8f8cSSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2387a30f8f8cSSatish Balay            submatrix  (same for all local rows)
2388a30f8f8cSSatish Balay .  d_nnz - array containing the number of block nonzeros in the various block rows
23896d10fdaeSSatish Balay            in the upper triangular portion of the in diagonal portion of the local
23900298fd71SBarry Smith            (possibly different for each block block row) or NULL.
239195742e49SBarry Smith            If you plan to factor the matrix you must leave room for the diagonal entry and
239295742e49SBarry Smith            set its value even if it is zero.
2393a30f8f8cSSatish Balay .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2394a30f8f8cSSatish Balay            submatrix (same for all local rows).
2395a30f8f8cSSatish Balay -  o_nnz - array containing the number of nonzeros in the various block rows of the
2396a30f8f8cSSatish Balay            off-diagonal portion of the local submatrix (possibly different for
23970298fd71SBarry Smith            each block row) or NULL.
2398a30f8f8cSSatish Balay 
2399a30f8f8cSSatish Balay    Output Parameter:
2400a30f8f8cSSatish Balay .  A - the matrix
2401a30f8f8cSSatish Balay 
2402a30f8f8cSSatish Balay    Options Database Keys:
2403a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
2404a30f8f8cSSatish Balay                      block calculations (much slower)
2405a30f8f8cSSatish Balay .   -mat_block_size - size of the blocks to use
2406a2b725a8SWilliam Gropp -   -mat_mpi - use the parallel matrix data structures even on one processor
2407a30f8f8cSSatish Balay                (defaults to using SeqBAIJ format on one processor)
2408a30f8f8cSSatish Balay 
2409175b88e8SBarry Smith    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2410f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2411175b88e8SBarry Smith    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2412175b88e8SBarry Smith 
2413a30f8f8cSSatish Balay    Notes:
2414d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
24156d6d819aSHong Zhang    This matrix type does not support complex Hermitian operation.
2416d1be2dadSMatthew Knepley 
2417a30f8f8cSSatish Balay    The user MUST specify either the local or global matrix dimensions
2418a30f8f8cSSatish Balay    (possibly both).
2419a30f8f8cSSatish Balay 
2420a30f8f8cSSatish Balay    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2421a30f8f8cSSatish Balay    than it must be used on all processors that share the object for that argument.
2422a30f8f8cSSatish Balay 
242349a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
242449a6f317SBarry Smith 
2425a30f8f8cSSatish Balay    Storage Information:
2426a30f8f8cSSatish Balay    For a square global matrix we define each processor's diagonal portion
2427a30f8f8cSSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
2428a30f8f8cSSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
2429a30f8f8cSSatish Balay    local matrix (a rectangular submatrix).
2430a30f8f8cSSatish Balay 
2431a30f8f8cSSatish Balay    The user can specify preallocated storage for the diagonal part of
2432a30f8f8cSSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
24330298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2434a30f8f8cSSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
2435a30f8f8cSSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2436a30f8f8cSSatish Balay 
2437a30f8f8cSSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2438a30f8f8cSSatish Balay    the figure below we depict these three local rows and all columns (0-11).
2439a30f8f8cSSatish Balay 
2440a30f8f8cSSatish Balay .vb
2441a30f8f8cSSatish Balay            0 1 2 3 4 5 6 7 8 9 10 11
2442a4b1a0f6SJed Brown           --------------------------
2443c2fc9fa9SBarry Smith    row 3  |. . . d d d o o o o  o  o
2444c2fc9fa9SBarry Smith    row 4  |. . . d d d o o o o  o  o
2445c2fc9fa9SBarry Smith    row 5  |. . . d d d o o o o  o  o
2446a4b1a0f6SJed Brown           --------------------------
2447a30f8f8cSSatish Balay .ve
2448a30f8f8cSSatish Balay 
2449a30f8f8cSSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
2450a30f8f8cSSatish Balay    submatrix, and any entries in the o locations are stored in the
24516d10fdaeSSatish Balay    o (off-diagonal) submatrix.  Note that the d matrix is stored in
24526d10fdaeSSatish Balay    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2453a30f8f8cSSatish Balay 
24546d10fdaeSSatish Balay    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
24556d10fdaeSSatish Balay    plus the diagonal part of the d matrix,
2456a30f8f8cSSatish Balay    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2457a30f8f8cSSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
2458a30f8f8cSSatish Balay    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2459a30f8f8cSSatish Balay    or you will get TERRIBLE performance; see the users' manual chapter on
2460a30f8f8cSSatish Balay    matrices.
2461a30f8f8cSSatish Balay 
2462a30f8f8cSSatish Balay    Level: intermediate
2463a30f8f8cSSatish Balay 
246469b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2465a30f8f8cSSatish Balay @*/
2466a30f8f8cSSatish Balay 
246769b1f4b7SBarry Smith PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2468a30f8f8cSSatish Balay {
24696849ba73SBarry Smith   PetscErrorCode ierr;
24701302d50aSBarry Smith   PetscMPIInt    size;
2471a30f8f8cSSatish Balay 
2472a30f8f8cSSatish Balay   PetscFunctionBegin;
2473f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2474f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2475273d9f13SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2476273d9f13SBarry Smith   if (size > 1) {
2477b5df2d14SHong Zhang     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2478b5df2d14SHong Zhang     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2479273d9f13SBarry Smith   } else {
2480273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2481273d9f13SBarry Smith     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2482273d9f13SBarry Smith   }
2483a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2484a30f8f8cSSatish Balay }
2485a30f8f8cSSatish Balay 
2486a30f8f8cSSatish Balay 
24876849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2488a30f8f8cSSatish Balay {
2489a30f8f8cSSatish Balay   Mat            mat;
2490a30f8f8cSSatish Balay   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2491dfbe8321SBarry Smith   PetscErrorCode ierr;
2492d0f46423SBarry Smith   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2493387bc808SHong Zhang   PetscScalar    *array;
2494a30f8f8cSSatish Balay 
2495a30f8f8cSSatish Balay   PetscFunctionBegin;
2496a30f8f8cSSatish Balay   *newmat = 0;
249726fbe8dcSKarl Rupp 
2498ce94432eSBarry Smith   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2499d0f46423SBarry Smith   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
25007adad957SLisandro Dalcin   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
25011e1e43feSBarry Smith   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
25021e1e43feSBarry Smith   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2503e1b6402fSHong Zhang 
2504d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
2505273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
250682327fa8SHong Zhang   mat->assembled    = PETSC_TRUE;
25077fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
25087fff6886SHong Zhang 
2509b5df2d14SHong Zhang   a      = (Mat_MPISBAIJ*)mat->data;
2510a30f8f8cSSatish Balay   a->bs2 = oldmat->bs2;
2511a30f8f8cSSatish Balay   a->mbs = oldmat->mbs;
2512a30f8f8cSSatish Balay   a->nbs = oldmat->nbs;
2513a30f8f8cSSatish Balay   a->Mbs = oldmat->Mbs;
2514a30f8f8cSSatish Balay   a->Nbs = oldmat->Nbs;
2515a30f8f8cSSatish Balay 
2516a30f8f8cSSatish Balay   a->size         = oldmat->size;
2517a30f8f8cSSatish Balay   a->rank         = oldmat->rank;
2518a30f8f8cSSatish Balay   a->donotstash   = oldmat->donotstash;
2519a30f8f8cSSatish Balay   a->roworiented  = oldmat->roworiented;
2520a30f8f8cSSatish Balay   a->rowindices   = 0;
2521a30f8f8cSSatish Balay   a->rowvalues    = 0;
2522a30f8f8cSSatish Balay   a->getrowactive = PETSC_FALSE;
2523a30f8f8cSSatish Balay   a->barray       = 0;
2524899cda47SBarry Smith   a->rstartbs     = oldmat->rstartbs;
2525899cda47SBarry Smith   a->rendbs       = oldmat->rendbs;
2526899cda47SBarry Smith   a->cstartbs     = oldmat->cstartbs;
2527899cda47SBarry Smith   a->cendbs       = oldmat->cendbs;
2528a30f8f8cSSatish Balay 
2529a30f8f8cSSatish Balay   /* hash table stuff */
2530a30f8f8cSSatish Balay   a->ht           = 0;
2531a30f8f8cSSatish Balay   a->hd           = 0;
2532a30f8f8cSSatish Balay   a->ht_size      = 0;
2533a30f8f8cSSatish Balay   a->ht_flag      = oldmat->ht_flag;
2534a30f8f8cSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2535a30f8f8cSSatish Balay   a->ht_total_ct  = 0;
2536a30f8f8cSSatish Balay   a->ht_insert_ct = 0;
2537a30f8f8cSSatish Balay 
2538580bdb30SBarry Smith   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr);
2539a30f8f8cSSatish Balay   if (oldmat->colmap) {
2540a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
2541a30f8f8cSSatish Balay     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2542a30f8f8cSSatish Balay #else
2543854ce69bSBarry Smith     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
25443bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2545580bdb30SBarry Smith     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
2546a30f8f8cSSatish Balay #endif
2547a30f8f8cSSatish Balay   } else a->colmap = 0;
2548387bc808SHong Zhang 
2549a30f8f8cSSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2550785e854fSJed Brown     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
25513bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2552580bdb30SBarry Smith     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
2553a30f8f8cSSatish Balay   } else a->garray = 0;
2554a30f8f8cSSatish Balay 
2555ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2556a30f8f8cSSatish Balay   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
25573bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2558a30f8f8cSSatish Balay   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
25593bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
256082327fa8SHong Zhang 
256182327fa8SHong Zhang   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
25623bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
256382327fa8SHong Zhang   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
25643bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2565387bc808SHong Zhang 
2566387bc808SHong Zhang   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
25671ebc52fbSHong Zhang   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2568778a2246SBarry Smith   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2569778a2246SBarry Smith   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
25701ebc52fbSHong Zhang   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
25711ebc52fbSHong Zhang   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2572778a2246SBarry Smith   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
25731ebc52fbSHong Zhang   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
25743bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
25753bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
25763bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
25773bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
25783bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2579387bc808SHong Zhang 
2580387bc808SHong Zhang   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2581387bc808SHong Zhang   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2582387bc808SHong Zhang   a->sMvctx = oldmat->sMvctx;
25833bb1ff40SBarry Smith   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
258482327fa8SHong Zhang 
2585a30f8f8cSSatish Balay   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
25863bb1ff40SBarry Smith   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2587a30f8f8cSSatish Balay   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
25883bb1ff40SBarry Smith   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2589140e18c1SBarry Smith   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2590a30f8f8cSSatish Balay   *newmat = mat;
2591a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2592a30f8f8cSSatish Balay }
2593a30f8f8cSSatish Balay 
2594*618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
2595*618cc2edSLisandro Dalcin #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2596*618cc2edSLisandro Dalcin 
2597*618cc2edSLisandro Dalcin PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
259895936485SShri Abhyankar {
259995936485SShri Abhyankar   PetscErrorCode ierr;
26007f489da9SVaclav Hapla   PetscBool      isbinary;
260195936485SShri Abhyankar 
260295936485SShri Abhyankar   PetscFunctionBegin;
26037f489da9SVaclav Hapla   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2604*618cc2edSLisandro Dalcin   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2605*618cc2edSLisandro Dalcin   ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
260695936485SShri Abhyankar   PetscFunctionReturn(0);
260795936485SShri Abhyankar }
260895936485SShri Abhyankar 
2609dcf5cc72SBarry Smith /*XXXXX@
2610a30f8f8cSSatish Balay    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2611a30f8f8cSSatish Balay 
2612a30f8f8cSSatish Balay    Input Parameters:
2613a30f8f8cSSatish Balay .  mat  - the matrix
2614a30f8f8cSSatish Balay .  fact - factor
2615a30f8f8cSSatish Balay 
2616c5eb9154SBarry Smith    Not Collective on Mat, each process can have a different hash factor
2617a30f8f8cSSatish Balay 
2618a30f8f8cSSatish Balay    Level: advanced
2619a30f8f8cSSatish Balay 
2620a30f8f8cSSatish Balay   Notes:
2621a30f8f8cSSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2622a30f8f8cSSatish Balay 
2623a30f8f8cSSatish Balay .seealso: MatSetOption()
2624dcf5cc72SBarry Smith @XXXXX*/
2625dcf5cc72SBarry Smith 
262624d5174aSHong Zhang 
2627985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
262824d5174aSHong Zhang {
262924d5174aSHong Zhang   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2630f4c0e9e4SHong Zhang   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2631ca54ac64SHong Zhang   PetscReal      atmp;
263287828ca2SBarry Smith   PetscReal      *work,*svalues,*rvalues;
2633dfbe8321SBarry Smith   PetscErrorCode ierr;
26341302d50aSBarry Smith   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
26351302d50aSBarry Smith   PetscMPIInt    rank,size;
26361302d50aSBarry Smith   PetscInt       *rowners_bs,dest,count,source;
263787828ca2SBarry Smith   PetscScalar    *va;
26388a1c53f2SBarry Smith   MatScalar      *ba;
2639f4c0e9e4SHong Zhang   MPI_Status     stat;
264024d5174aSHong Zhang 
264124d5174aSHong Zhang   PetscFunctionBegin;
2642e32f2f54SBarry Smith   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
26430298fd71SBarry Smith   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
26441ebc52fbSHong Zhang   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2645f4c0e9e4SHong Zhang 
2646ce94432eSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2647ce94432eSBarry Smith   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2648f4c0e9e4SHong Zhang 
2649d0f46423SBarry Smith   bs  = A->rmap->bs;
2650f4c0e9e4SHong Zhang   mbs = a->mbs;
2651f4c0e9e4SHong Zhang   Mbs = a->Mbs;
2652f4c0e9e4SHong Zhang   ba  = b->a;
2653f4c0e9e4SHong Zhang   bi  = b->i;
2654f4c0e9e4SHong Zhang   bj  = b->j;
2655f4c0e9e4SHong Zhang 
2656f4c0e9e4SHong Zhang   /* find ownerships */
2657d0f46423SBarry Smith   rowners_bs = A->rmap->range;
2658f4c0e9e4SHong Zhang 
2659f4c0e9e4SHong Zhang   /* each proc creates an array to be distributed */
2660580bdb30SBarry Smith   ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2661f4c0e9e4SHong Zhang 
2662f4c0e9e4SHong Zhang   /* row_max for B */
2663b8475685SHong Zhang   if (rank != size-1) {
2664f4c0e9e4SHong Zhang     for (i=0; i<mbs; i++) {
2665f4c0e9e4SHong Zhang       ncols = bi[1] - bi[0]; bi++;
2666f4c0e9e4SHong Zhang       brow  = bs*i;
2667f4c0e9e4SHong Zhang       for (j=0; j<ncols; j++) {
2668f4c0e9e4SHong Zhang         bcol = bs*(*bj);
2669f4c0e9e4SHong Zhang         for (kcol=0; kcol<bs; kcol++) {
2670ca54ac64SHong Zhang           col  = bcol + kcol;                /* local col index */
267104d41228SHong Zhang           col += rowners_bs[rank+1];      /* global col index */
2672f4c0e9e4SHong Zhang           for (krow=0; krow<bs; krow++) {
2673f4c0e9e4SHong Zhang             atmp = PetscAbsScalar(*ba); ba++;
2674ca54ac64SHong Zhang             row  = brow + krow;   /* local row index */
2675ca54ac64SHong Zhang             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2676f4c0e9e4SHong Zhang             if (work[col] < atmp) work[col] = atmp;
2677f4c0e9e4SHong Zhang           }
2678f4c0e9e4SHong Zhang         }
2679f4c0e9e4SHong Zhang         bj++;
2680f4c0e9e4SHong Zhang       }
2681f4c0e9e4SHong Zhang     }
2682f4c0e9e4SHong Zhang 
2683f4c0e9e4SHong Zhang     /* send values to its owners */
2684f4c0e9e4SHong Zhang     for (dest=rank+1; dest<size; dest++) {
2685f4c0e9e4SHong Zhang       svalues = work + rowners_bs[dest];
2686ca54ac64SHong Zhang       count   = rowners_bs[dest+1]-rowners_bs[dest];
2687ce94432eSBarry Smith       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2688ca54ac64SHong Zhang     }
2689f4c0e9e4SHong Zhang   }
2690f4c0e9e4SHong Zhang 
2691f4c0e9e4SHong Zhang   /* receive values */
2692ca54ac64SHong Zhang   if (rank) {
2693f4c0e9e4SHong Zhang     rvalues = work;
2694ca54ac64SHong Zhang     count   = rowners_bs[rank+1]-rowners_bs[rank];
2695f4c0e9e4SHong Zhang     for (source=0; source<rank; source++) {
2696ce94432eSBarry Smith       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2697f4c0e9e4SHong Zhang       /* process values */
2698f4c0e9e4SHong Zhang       for (i=0; i<count; i++) {
2699ca54ac64SHong Zhang         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2700f4c0e9e4SHong Zhang       }
2701f4c0e9e4SHong Zhang     }
2702ca54ac64SHong Zhang   }
2703f4c0e9e4SHong Zhang 
27041ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2705ac355199SBarry Smith   ierr = PetscFree(work);CHKERRQ(ierr);
270624d5174aSHong Zhang   PetscFunctionReturn(0);
270724d5174aSHong Zhang }
27082798e883SHong Zhang 
270941f059aeSBarry Smith PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
27102798e883SHong Zhang {
27112798e883SHong Zhang   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2712dfbe8321SBarry Smith   PetscErrorCode    ierr;
2713d0f46423SBarry Smith   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
27143649974fSBarry Smith   PetscScalar       *x,*ptr,*from;
2715ffe4fb16SHong Zhang   Vec               bb1;
27163649974fSBarry Smith   const PetscScalar *b;
2717ffe4fb16SHong Zhang 
2718ffe4fb16SHong Zhang   PetscFunctionBegin;
2719e32f2f54SBarry Smith   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2720e32f2f54SBarry Smith   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2721ffe4fb16SHong Zhang 
2722a2b30743SBarry Smith   if (flag == SOR_APPLY_UPPER) {
272341f059aeSBarry Smith     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2724a2b30743SBarry Smith     PetscFunctionReturn(0);
2725a2b30743SBarry Smith   }
2726a2b30743SBarry Smith 
2727ffe4fb16SHong Zhang   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2728ffe4fb16SHong Zhang     if (flag & SOR_ZERO_INITIAL_GUESS) {
272941f059aeSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2730ffe4fb16SHong Zhang       its--;
2731ffe4fb16SHong Zhang     }
2732ffe4fb16SHong Zhang 
2733ffe4fb16SHong Zhang     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2734ffe4fb16SHong Zhang     while (its--) {
2735ffe4fb16SHong Zhang 
2736ffe4fb16SHong Zhang       /* lower triangular part: slvec0b = - B^T*xx */
2737ffe4fb16SHong Zhang       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2738ffe4fb16SHong Zhang 
2739ffe4fb16SHong Zhang       /* copy xx into slvec0a */
27401ebc52fbSHong Zhang       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
27411ebc52fbSHong Zhang       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2742580bdb30SBarry Smith       ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr);
27431ebc52fbSHong Zhang       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2744ffe4fb16SHong Zhang 
2745efb30889SBarry Smith       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2746ffe4fb16SHong Zhang 
2747ffe4fb16SHong Zhang       /* copy bb into slvec1a */
27481ebc52fbSHong Zhang       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
27493649974fSBarry Smith       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2750580bdb30SBarry Smith       ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr);
27511ebc52fbSHong Zhang       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2752ffe4fb16SHong Zhang 
2753ffe4fb16SHong Zhang       /* set slvec1b = 0 */
2754fa22f6d0SBarry Smith       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2755ffe4fb16SHong Zhang 
2756ca9f406cSSatish Balay       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
27571ebc52fbSHong Zhang       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2758a8b09249SBarry Smith       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2759ca9f406cSSatish Balay       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2760ffe4fb16SHong Zhang 
2761ffe4fb16SHong Zhang       /* upper triangular part: bb1 = bb1 - B*x */
2762ffe4fb16SHong Zhang       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2763ffe4fb16SHong Zhang 
2764ffe4fb16SHong Zhang       /* local diagonal sweep */
276541f059aeSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2766ffe4fb16SHong Zhang     }
27676bf464f9SBarry Smith     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2768fa22f6d0SBarry Smith   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
276941f059aeSBarry Smith     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2770fa22f6d0SBarry Smith   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
277141f059aeSBarry Smith     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2772fa22f6d0SBarry Smith   } else if (flag & SOR_EISENSTAT) {
2773fa22f6d0SBarry Smith     Vec               xx1;
2774ace3abfcSBarry Smith     PetscBool         hasop;
277520f1ed55SBarry Smith     const PetscScalar *diag;
2776887ee2caSBarry Smith     PetscScalar       *sl,scale = (omega - 2.0)/omega;
277720f1ed55SBarry Smith     PetscInt          i,n;
2778fa22f6d0SBarry Smith 
2779fa22f6d0SBarry Smith     if (!mat->xx1) {
2780fa22f6d0SBarry Smith       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2781fa22f6d0SBarry Smith       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2782fa22f6d0SBarry Smith     }
2783fa22f6d0SBarry Smith     xx1 = mat->xx1;
2784fa22f6d0SBarry Smith     bb1 = mat->bb1;
2785fa22f6d0SBarry Smith 
278641f059aeSBarry Smith     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2787fa22f6d0SBarry Smith 
2788fa22f6d0SBarry Smith     if (!mat->diag) {
2789effcda25SBarry Smith       /* this is wrong for same matrix with new nonzero values */
27902a7a6963SBarry Smith       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2791fa22f6d0SBarry Smith       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2792fa22f6d0SBarry Smith     }
2793fa22f6d0SBarry Smith     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2794fa22f6d0SBarry Smith 
2795fa22f6d0SBarry Smith     if (hasop) {
2796fa22f6d0SBarry Smith       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2797887ee2caSBarry Smith       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
279820f1ed55SBarry Smith     } else {
279920f1ed55SBarry Smith       /*
280020f1ed55SBarry Smith           These two lines are replaced by code that may be a bit faster for a good compiler
280120f1ed55SBarry Smith       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2802887ee2caSBarry Smith       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
280320f1ed55SBarry Smith       */
280420f1ed55SBarry Smith       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
28053649974fSBarry Smith       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
28063649974fSBarry Smith       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
280720f1ed55SBarry Smith       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
280820f1ed55SBarry Smith       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2809887ee2caSBarry Smith       if (omega == 1.0) {
281026fbe8dcSKarl Rupp         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
281120f1ed55SBarry Smith         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2812887ee2caSBarry Smith       } else {
281326fbe8dcSKarl Rupp         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2814887ee2caSBarry Smith         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2815887ee2caSBarry Smith       }
281620f1ed55SBarry Smith       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
28173649974fSBarry Smith       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
28183649974fSBarry Smith       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
281920f1ed55SBarry Smith       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
282020f1ed55SBarry Smith     }
2821fa22f6d0SBarry Smith 
2822fa22f6d0SBarry Smith     /* multiply off-diagonal portion of matrix */
2823fa22f6d0SBarry Smith     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2824fa22f6d0SBarry Smith     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2825fa22f6d0SBarry Smith     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2826fa22f6d0SBarry Smith     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2827580bdb30SBarry Smith     ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
2828fa22f6d0SBarry Smith     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2829fa22f6d0SBarry Smith     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2830fa22f6d0SBarry Smith     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2831fa22f6d0SBarry Smith     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2832effcda25SBarry Smith     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2833fa22f6d0SBarry Smith 
2834fa22f6d0SBarry Smith     /* local sweep */
283541f059aeSBarry Smith     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2836fa22f6d0SBarry Smith     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2837f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2838ffe4fb16SHong Zhang   PetscFunctionReturn(0);
2839ffe4fb16SHong Zhang }
2840ffe4fb16SHong Zhang 
284141f059aeSBarry Smith PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2842ffe4fb16SHong Zhang {
2843ffe4fb16SHong Zhang   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2844dfbe8321SBarry Smith   PetscErrorCode ierr;
28452798e883SHong Zhang   Vec            lvec1,bb1;
28462798e883SHong Zhang 
28472798e883SHong Zhang   PetscFunctionBegin;
2848e32f2f54SBarry Smith   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2849e32f2f54SBarry Smith   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
28502798e883SHong Zhang 
2851c14dc6b6SHong Zhang   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
28522798e883SHong Zhang     if (flag & SOR_ZERO_INITIAL_GUESS) {
285341f059aeSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
28542798e883SHong Zhang       its--;
28552798e883SHong Zhang     }
28562798e883SHong Zhang 
28572798e883SHong Zhang     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
28582798e883SHong Zhang     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
28592798e883SHong Zhang     while (its--) {
2860ca9f406cSSatish Balay       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
28612798e883SHong Zhang 
28622798e883SHong Zhang       /* lower diagonal part: bb1 = bb - B^T*xx */
28632798e883SHong Zhang       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2864efb30889SBarry Smith       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
28652798e883SHong Zhang 
2866ca9f406cSSatish Balay       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
28672798e883SHong Zhang       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2868ca9f406cSSatish Balay       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
28692798e883SHong Zhang 
28702798e883SHong Zhang       /* upper diagonal part: bb1 = bb1 - B*x */
2871efb30889SBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
28722798e883SHong Zhang       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
28732798e883SHong Zhang 
2874ca9f406cSSatish Balay       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
28752798e883SHong Zhang 
2876c14dc6b6SHong Zhang       /* diagonal sweep */
287741f059aeSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
28782798e883SHong Zhang     }
28796bf464f9SBarry Smith     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
28806bf464f9SBarry Smith     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
28816bf464f9SBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
28822798e883SHong Zhang   PetscFunctionReturn(0);
28832798e883SHong Zhang }
28842798e883SHong Zhang 
2885dfb205c3SBarry Smith /*@
2886dfb205c3SBarry Smith      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2887dfb205c3SBarry Smith          CSR format the local rows.
2888dfb205c3SBarry Smith 
2889d083f849SBarry Smith    Collective
2890dfb205c3SBarry Smith 
2891dfb205c3SBarry Smith    Input Parameters:
2892dfb205c3SBarry Smith +  comm - MPI communicator
2893dfb205c3SBarry Smith .  bs - the block size, only a block size of 1 is supported
2894dfb205c3SBarry Smith .  m - number of local rows (Cannot be PETSC_DECIDE)
2895dfb205c3SBarry Smith .  n - This value should be the same as the local size used in creating the
2896dfb205c3SBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2897dfb205c3SBarry Smith        calculated if N is given) For square matrices n is almost always m.
2898dfb205c3SBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2899dfb205c3SBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2900483a2f95SBarry Smith .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2901dfb205c3SBarry Smith .   j - column indices
2902dfb205c3SBarry Smith -   a - matrix values
2903dfb205c3SBarry Smith 
2904dfb205c3SBarry Smith    Output Parameter:
2905dfb205c3SBarry Smith .   mat - the matrix
2906dfb205c3SBarry Smith 
2907dfb205c3SBarry Smith    Level: intermediate
2908dfb205c3SBarry Smith 
2909dfb205c3SBarry Smith    Notes:
2910dfb205c3SBarry Smith        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2911dfb205c3SBarry Smith      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2912dfb205c3SBarry Smith      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2913dfb205c3SBarry Smith 
2914dfb205c3SBarry Smith        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2915dfb205c3SBarry Smith 
2916dfb205c3SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
291769b1f4b7SBarry Smith           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2918dfb205c3SBarry Smith @*/
29197087cfbeSBarry Smith PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2920dfb205c3SBarry Smith {
2921dfb205c3SBarry Smith   PetscErrorCode ierr;
2922dfb205c3SBarry Smith 
2923dfb205c3SBarry Smith 
2924dfb205c3SBarry Smith   PetscFunctionBegin;
2925f23aa3ddSBarry Smith   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2926dfb205c3SBarry Smith   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2927dfb205c3SBarry Smith   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2928dfb205c3SBarry Smith   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2929dfb205c3SBarry Smith   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2930dfb205c3SBarry Smith   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2931dfb205c3SBarry Smith   PetscFunctionReturn(0);
2932dfb205c3SBarry Smith }
2933dfb205c3SBarry Smith 
2934dfb205c3SBarry Smith 
2935dfb205c3SBarry Smith /*@C
2936664954b6SBarry Smith    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2937dfb205c3SBarry Smith 
2938d083f849SBarry Smith    Collective
2939dfb205c3SBarry Smith 
2940dfb205c3SBarry Smith    Input Parameters:
29411c4f3114SJed Brown +  B - the matrix
2942dfb205c3SBarry Smith .  bs - the block size
2943dfb205c3SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
2944dfb205c3SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2945dfb205c3SBarry Smith -  v - optional values in the matrix
2946dfb205c3SBarry Smith 
2947664954b6SBarry Smith    Level: advanced
2948664954b6SBarry Smith 
2949664954b6SBarry Smith    Notes:
29500cd7f59aSBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
29510cd7f59aSBarry Smith    and usually the numerical values as well
29520cd7f59aSBarry Smith 
295350c5228eSBarry Smith    Any entries below the diagonal are ignored
2954dfb205c3SBarry Smith 
295569b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2956dfb205c3SBarry Smith @*/
29577087cfbeSBarry Smith PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2958dfb205c3SBarry Smith {
29594ac538c5SBarry Smith   PetscErrorCode ierr;
2960dfb205c3SBarry Smith 
2961dfb205c3SBarry Smith   PetscFunctionBegin;
29624ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2963dfb205c3SBarry Smith   PetscFunctionReturn(0);
2964dfb205c3SBarry Smith }
2965dfb205c3SBarry Smith 
296610c56fdeSHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
29674dcd73b1SHong Zhang {
29684dcd73b1SHong Zhang   PetscErrorCode ierr;
296910c56fdeSHong Zhang   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
297010c56fdeSHong Zhang   PetscInt       *indx;
297110c56fdeSHong Zhang   PetscScalar    *values;
2972dfb205c3SBarry Smith 
29734dcd73b1SHong Zhang   PetscFunctionBegin;
29744dcd73b1SHong Zhang   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
297510c56fdeSHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
297610c56fdeSHong Zhang     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2977de25e9cbSPierre Jolivet     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
297810c56fdeSHong Zhang     PetscInt       *bindx,rmax=a->rmax,j;
2979de25e9cbSPierre Jolivet     PetscMPIInt    rank,size;
29804dcd73b1SHong Zhang 
298110c56fdeSHong Zhang     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
298210c56fdeSHong Zhang     mbs = m/bs; Nbs = N/cbs;
298310c56fdeSHong Zhang     if (n == PETSC_DECIDE) {
2984da91a574SPierre Jolivet       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
298510c56fdeSHong Zhang     }
2986da91a574SPierre Jolivet     nbs = n/cbs;
29874dcd73b1SHong Zhang 
29884dcd73b1SHong Zhang     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2989de25e9cbSPierre Jolivet     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
2990de25e9cbSPierre Jolivet 
2991de25e9cbSPierre Jolivet     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2992de25e9cbSPierre Jolivet     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
2993de25e9cbSPierre Jolivet     if (rank == size-1) {
2994de25e9cbSPierre Jolivet       /* Check sum(nbs) = Nbs */
2995de25e9cbSPierre Jolivet       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
2996de25e9cbSPierre Jolivet     }
2997de25e9cbSPierre Jolivet 
2998de25e9cbSPierre Jolivet     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
2999e491d569SHong Zhang     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
300010c56fdeSHong Zhang     for (i=0; i<mbs; i++) {
30014dcd73b1SHong Zhang       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
30024dcd73b1SHong Zhang       nnz  = nnz/bs;
30034dcd73b1SHong Zhang       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
30044dcd73b1SHong Zhang       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
30054dcd73b1SHong Zhang       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
30064dcd73b1SHong Zhang     }
3007e491d569SHong Zhang     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
30084dcd73b1SHong Zhang     ierr = PetscFree(bindx);CHKERRQ(ierr);
30094dcd73b1SHong Zhang 
30104dcd73b1SHong Zhang     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3011de25e9cbSPierre Jolivet     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
30124dcd73b1SHong Zhang     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3013ce94fc41SPierre Jolivet     ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr);
3014ce94fc41SPierre Jolivet     ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
30154dcd73b1SHong Zhang     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
30164dcd73b1SHong Zhang     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
30174dcd73b1SHong Zhang   }
30184dcd73b1SHong Zhang 
301910c56fdeSHong Zhang   /* numeric phase */
30204dcd73b1SHong Zhang   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
302110c56fdeSHong Zhang   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
30224dcd73b1SHong Zhang 
3023e491d569SHong Zhang   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
30244dcd73b1SHong Zhang   for (i=0; i<m; i++) {
30254dcd73b1SHong Zhang     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
30264dcd73b1SHong Zhang     Ii   = i + rstart;
302710c56fdeSHong Zhang     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
30284dcd73b1SHong Zhang     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
30294dcd73b1SHong Zhang   }
3030e491d569SHong Zhang   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
303110c56fdeSHong Zhang   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
303210c56fdeSHong Zhang   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
30334dcd73b1SHong Zhang   PetscFunctionReturn(0);
30344dcd73b1SHong Zhang }
3035