xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 43516a2dbae745ae94faf364ea7a6942f760589a)
12593348eSBarry Smith /*
2b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
32593348eSBarry Smith   matrix storage format.
42593348eSBarry Smith */
570f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
61a6a6d98SBarry Smith #include "src/inline/spops.h"
7325e03aeSBarry Smith #include "petscsys.h"                     /*I "petscmat.h" I*/
83b547af2SSatish Balay 
9b01c7715SBarry Smith #include "src/inline/ilu.h"
10b01c7715SBarry Smith 
11b01c7715SBarry Smith #undef __FUNCT__
12*43516a2dSKris Buschelman #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
13*43516a2dSKris Buschelman /*@C
14*43516a2dSKris Buschelman   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
15*43516a2dSKris Buschelman 
16*43516a2dSKris Buschelman   Collective on Mat
17*43516a2dSKris Buschelman 
18*43516a2dSKris Buschelman   Input Parameters:
19*43516a2dSKris Buschelman . mat - the matrix
20*43516a2dSKris Buschelman 
21*43516a2dSKris Buschelman   Level: advanced
22*43516a2dSKris Buschelman @*/
23*43516a2dSKris Buschelman PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat)
24*43516a2dSKris Buschelman {
25*43516a2dSKris Buschelman   PetscErrorCode ierr,(*f)(Mat);
26*43516a2dSKris Buschelman 
27*43516a2dSKris Buschelman   PetscFunctionBegin;
28*43516a2dSKris Buschelman   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
29*43516a2dSKris Buschelman   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
30*43516a2dSKris Buschelman   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
31*43516a2dSKris Buschelman 
32*43516a2dSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
33*43516a2dSKris Buschelman   if (f) {
34*43516a2dSKris Buschelman     ierr = (*f)(mat);CHKERRQ(ierr);
35*43516a2dSKris Buschelman   } else {
36*43516a2dSKris Buschelman     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
37*43516a2dSKris Buschelman   }
38*43516a2dSKris Buschelman   PetscFunctionReturn(0);
39*43516a2dSKris Buschelman }
40*43516a2dSKris Buschelman 
41*43516a2dSKris Buschelman EXTERN_C_BEGIN
42*43516a2dSKris Buschelman #undef __FUNCT__
43b01c7715SBarry Smith #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
44dfbe8321SBarry Smith PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
45b01c7715SBarry Smith {
46b01c7715SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
476849ba73SBarry Smith   PetscErrorCode ierr;
48521d7252SBarry Smith   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
49b01c7715SBarry Smith   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
50b01c7715SBarry Smith 
51b01c7715SBarry Smith   PetscFunctionBegin;
52b01c7715SBarry Smith   if (a->idiagvalid) PetscFunctionReturn(0);
53b01c7715SBarry Smith   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
54b01c7715SBarry Smith   diag_offset = a->diag;
55b01c7715SBarry Smith   if (!a->idiag) {
56b01c7715SBarry Smith     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
57b01c7715SBarry Smith   }
58b01c7715SBarry Smith   diag  = a->idiag;
59b01c7715SBarry Smith   mdiag = a->idiag+bs*bs*mbs;
60b01c7715SBarry Smith   /* factor and invert each block */
61521d7252SBarry Smith   switch (bs){
62b01c7715SBarry Smith     case 2:
63b01c7715SBarry Smith       for (i=0; i<mbs; i++) {
64b01c7715SBarry Smith         odiag   = v + 4*diag_offset[i];
65b01c7715SBarry Smith         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
66b01c7715SBarry Smith 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
67b01c7715SBarry Smith 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
68b01c7715SBarry Smith 	diag    += 4;
69b01c7715SBarry Smith 	mdiag   += 4;
70b01c7715SBarry Smith       }
71b01c7715SBarry Smith       break;
72b01c7715SBarry Smith     case 3:
73b01c7715SBarry Smith       for (i=0; i<mbs; i++) {
74b01c7715SBarry Smith         odiag    = v + 9*diag_offset[i];
75b01c7715SBarry Smith         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
76b01c7715SBarry Smith         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
77b01c7715SBarry Smith         diag[8]  = odiag[8];
78b01c7715SBarry Smith         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79b01c7715SBarry Smith         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
80b01c7715SBarry Smith         mdiag[8] = odiag[8];
81b01c7715SBarry Smith 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
82b01c7715SBarry Smith 	diag    += 9;
83b01c7715SBarry Smith 	mdiag   += 9;
84b01c7715SBarry Smith       }
85b01c7715SBarry Smith       break;
86b01c7715SBarry Smith     case 4:
87b01c7715SBarry Smith       for (i=0; i<mbs; i++) {
88b01c7715SBarry Smith         odiag  = v + 16*diag_offset[i];
89b01c7715SBarry Smith         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
90b01c7715SBarry Smith         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
91b01c7715SBarry Smith 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
92b01c7715SBarry Smith 	diag  += 16;
93b01c7715SBarry Smith 	mdiag += 16;
94b01c7715SBarry Smith       }
95b01c7715SBarry Smith       break;
96b01c7715SBarry Smith     case 5:
97b01c7715SBarry Smith       for (i=0; i<mbs; i++) {
98b01c7715SBarry Smith         odiag = v + 25*diag_offset[i];
99b01c7715SBarry Smith         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
100b01c7715SBarry Smith         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
101b01c7715SBarry Smith 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
102b01c7715SBarry Smith 	diag  += 25;
103b01c7715SBarry Smith 	mdiag += 25;
104b01c7715SBarry Smith       }
105b01c7715SBarry Smith       break;
106b01c7715SBarry Smith     default:
107521d7252SBarry Smith       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
108b01c7715SBarry Smith   }
109b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
110b01c7715SBarry Smith   PetscFunctionReturn(0);
111b01c7715SBarry Smith }
112*43516a2dSKris Buschelman EXTERN_C_END
113b01c7715SBarry Smith 
114b01c7715SBarry Smith #undef __FUNCT__
115b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
116c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
117b01c7715SBarry Smith {
118b01c7715SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
119b01c7715SBarry Smith   PetscScalar        *x,x1,x2,s1,s2;
120b01c7715SBarry Smith   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
121dfbe8321SBarry Smith   PetscErrorCode     ierr;
122c1ac3661SBarry Smith   PetscInt           m = a->mbs,i,i2,nz,idx;
123c1ac3661SBarry Smith   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
124b01c7715SBarry Smith 
125b01c7715SBarry Smith   PetscFunctionBegin;
126b01c7715SBarry Smith   its = its*lits;
12777431f27SBarry Smith   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
128b01c7715SBarry Smith   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
129b01c7715SBarry Smith   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
130b01c7715SBarry Smith   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
131b01c7715SBarry Smith   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
132b01c7715SBarry Smith 
133b01c7715SBarry Smith   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
134b01c7715SBarry Smith 
135b01c7715SBarry Smith   diag  = a->diag;
136b01c7715SBarry Smith   idiag = a->idiag;
1371ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1381ebc52fbSHong Zhang   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
139b01c7715SBarry Smith 
140b01c7715SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
141b01c7715SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
142b01c7715SBarry Smith       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
143b01c7715SBarry Smith       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
144b01c7715SBarry Smith       i2     = 2;
145b01c7715SBarry Smith       idiag += 4;
146b01c7715SBarry Smith       for (i=1; i<m; i++) {
147b01c7715SBarry Smith 	v     = aa + 4*ai[i];
148b01c7715SBarry Smith 	vi    = aj + ai[i];
149b01c7715SBarry Smith 	nz    = diag[i] - ai[i];
150b01c7715SBarry Smith 	s1    = b[i2]; s2 = b[i2+1];
151b01c7715SBarry Smith 	while (nz--) {
152b01c7715SBarry Smith 	  idx  = 2*(*vi++);
153b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx];
154b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[2]*x2;
155b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[3]*x2;
156b01c7715SBarry Smith 	  v   += 4;
157b01c7715SBarry Smith 	}
158b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
159b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
160b01c7715SBarry Smith         idiag   += 4;
161b01c7715SBarry Smith         i2      += 2;
162b01c7715SBarry Smith       }
163b01c7715SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
164efee365bSSatish Balay       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
165b01c7715SBarry Smith     }
166b01c7715SBarry Smith     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
167b01c7715SBarry Smith         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
168b01c7715SBarry Smith       i2    = 0;
169b01c7715SBarry Smith       mdiag = a->idiag+4*a->mbs;
170b01c7715SBarry Smith       for (i=0; i<m; i++) {
171b01c7715SBarry Smith         x1      = x[i2]; x2 = x[i2+1];
172b01c7715SBarry Smith         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
173b01c7715SBarry Smith         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
174b01c7715SBarry Smith         mdiag  += 4;
175b01c7715SBarry Smith         i2     += 2;
176b01c7715SBarry Smith       }
177efee365bSSatish Balay       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
178b01c7715SBarry Smith     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
179b01c7715SBarry Smith       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
180b01c7715SBarry Smith     }
181b01c7715SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
182b01c7715SBarry Smith       idiag   = a->idiag+4*a->mbs - 4;
183b01c7715SBarry Smith       i2      = 2*m - 2;
184b01c7715SBarry Smith       x1      = x[i2]; x2 = x[i2+1];
185b01c7715SBarry Smith       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
186b01c7715SBarry Smith       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
187b01c7715SBarry Smith       idiag -= 4;
188b01c7715SBarry Smith       i2    -= 2;
189b01c7715SBarry Smith       for (i=m-2; i>=0; i--) {
190b01c7715SBarry Smith 	v     = aa + 4*(diag[i]+1);
191b01c7715SBarry Smith 	vi    = aj + diag[i] + 1;
192b01c7715SBarry Smith 	nz    = ai[i+1] - diag[i] - 1;
193b01c7715SBarry Smith 	s1    = x[i2]; s2 = x[i2+1];
194b01c7715SBarry Smith 	while (nz--) {
195b01c7715SBarry Smith 	  idx  = 2*(*vi++);
196b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx];
197b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[2]*x2;
198b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[3]*x2;
199b01c7715SBarry Smith 	  v   += 4;
200b01c7715SBarry Smith 	}
201b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
202b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
203b01c7715SBarry Smith         idiag   -= 4;
204b01c7715SBarry Smith         i2      -= 2;
205b01c7715SBarry Smith       }
206efee365bSSatish Balay       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
207b01c7715SBarry Smith     }
208b01c7715SBarry Smith   } else {
209634064b4SBarry Smith     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
210b01c7715SBarry Smith   }
2111ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2121ebc52fbSHong Zhang   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
213b01c7715SBarry Smith   PetscFunctionReturn(0);
214b01c7715SBarry Smith }
215b01c7715SBarry Smith 
216b01c7715SBarry Smith #undef __FUNCT__
217b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
218c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
219b01c7715SBarry Smith {
220b01c7715SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
221b01c7715SBarry Smith   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
222b01c7715SBarry Smith   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
223dfbe8321SBarry Smith   PetscErrorCode     ierr;
224c1ac3661SBarry Smith   PetscInt           m = a->mbs,i,i2,nz,idx;
225c1ac3661SBarry Smith   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
226b01c7715SBarry Smith 
227b01c7715SBarry Smith   PetscFunctionBegin;
228b01c7715SBarry Smith   its = its*lits;
22977431f27SBarry Smith   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230b01c7715SBarry Smith   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231b01c7715SBarry Smith   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232b01c7715SBarry Smith   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
233b01c7715SBarry Smith   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
234b01c7715SBarry Smith 
235b01c7715SBarry Smith   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
236b01c7715SBarry Smith 
237b01c7715SBarry Smith   diag  = a->diag;
238b01c7715SBarry Smith   idiag = a->idiag;
2391ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2401ebc52fbSHong Zhang   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
241b01c7715SBarry Smith 
242b01c7715SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
243b01c7715SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
244b01c7715SBarry Smith       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
245b01c7715SBarry Smith       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
246b01c7715SBarry Smith       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
247b01c7715SBarry Smith       i2     = 3;
248b01c7715SBarry Smith       idiag += 9;
249b01c7715SBarry Smith       for (i=1; i<m; i++) {
250b01c7715SBarry Smith 	v     = aa + 9*ai[i];
251b01c7715SBarry Smith 	vi    = aj + ai[i];
252b01c7715SBarry Smith 	nz    = diag[i] - ai[i];
253b01c7715SBarry Smith 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
254b01c7715SBarry Smith 	while (nz--) {
255b01c7715SBarry Smith 	  idx  = 3*(*vi++);
256b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
257b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
258b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
259b01c7715SBarry Smith 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
260b01c7715SBarry Smith 	  v   += 9;
261b01c7715SBarry Smith 	}
262b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
263b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
264b01c7715SBarry Smith 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
265b01c7715SBarry Smith         idiag   += 9;
266b01c7715SBarry Smith         i2      += 3;
267b01c7715SBarry Smith       }
268b01c7715SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
269efee365bSSatish Balay       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
270b01c7715SBarry Smith     }
271b01c7715SBarry Smith     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
272b01c7715SBarry Smith         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
273b01c7715SBarry Smith       i2    = 0;
274b01c7715SBarry Smith       mdiag = a->idiag+9*a->mbs;
275b01c7715SBarry Smith       for (i=0; i<m; i++) {
276b01c7715SBarry Smith         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
277b01c7715SBarry Smith         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
278b01c7715SBarry Smith         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
279b01c7715SBarry Smith         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
280b01c7715SBarry Smith         mdiag  += 9;
281b01c7715SBarry Smith         i2     += 3;
282b01c7715SBarry Smith       }
283efee365bSSatish Balay       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
284b01c7715SBarry Smith     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285b01c7715SBarry Smith       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
286b01c7715SBarry Smith     }
287b01c7715SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
288b01c7715SBarry Smith       idiag   = a->idiag+9*a->mbs - 9;
289b01c7715SBarry Smith       i2      = 3*m - 3;
290b01c7715SBarry Smith       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
291b01c7715SBarry Smith       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
292b01c7715SBarry Smith       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
293b01c7715SBarry Smith       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
294b01c7715SBarry Smith       idiag -= 9;
295b01c7715SBarry Smith       i2    -= 3;
296b01c7715SBarry Smith       for (i=m-2; i>=0; i--) {
297b01c7715SBarry Smith 	v     = aa + 9*(diag[i]+1);
298b01c7715SBarry Smith 	vi    = aj + diag[i] + 1;
299b01c7715SBarry Smith 	nz    = ai[i+1] - diag[i] - 1;
300b01c7715SBarry Smith 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
301b01c7715SBarry Smith 	while (nz--) {
302b01c7715SBarry Smith 	  idx  = 3*(*vi++);
303b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
304b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
305b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
306b01c7715SBarry Smith 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
307b01c7715SBarry Smith 	  v   += 9;
308b01c7715SBarry Smith 	}
309b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
310b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
311b01c7715SBarry Smith 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
312b01c7715SBarry Smith         idiag   -= 9;
313b01c7715SBarry Smith         i2      -= 3;
314b01c7715SBarry Smith       }
315efee365bSSatish Balay       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
316b01c7715SBarry Smith     }
317b01c7715SBarry Smith   } else {
318634064b4SBarry Smith     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
319b01c7715SBarry Smith   }
3201ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3211ebc52fbSHong Zhang   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
322b01c7715SBarry Smith   PetscFunctionReturn(0);
323b01c7715SBarry Smith }
324b01c7715SBarry Smith 
325b01c7715SBarry Smith #undef __FUNCT__
326b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
327c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
328b01c7715SBarry Smith {
329b01c7715SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
330b01c7715SBarry Smith   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
331b01c7715SBarry Smith   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
332dfbe8321SBarry Smith   PetscErrorCode     ierr;
333c1ac3661SBarry Smith   PetscInt           m = a->mbs,i,i2,nz,idx;
334c1ac3661SBarry Smith   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
335b01c7715SBarry Smith 
336b01c7715SBarry Smith   PetscFunctionBegin;
337b01c7715SBarry Smith   its = its*lits;
33877431f27SBarry Smith   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
339b01c7715SBarry Smith   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
340b01c7715SBarry Smith   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
341b01c7715SBarry Smith   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
342b01c7715SBarry Smith   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
343b01c7715SBarry Smith 
344b01c7715SBarry Smith   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
345b01c7715SBarry Smith 
346b01c7715SBarry Smith   diag  = a->diag;
347b01c7715SBarry Smith   idiag = a->idiag;
3481ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3491ebc52fbSHong Zhang   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
350b01c7715SBarry Smith 
351b01c7715SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
352b01c7715SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
353b01c7715SBarry Smith       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
354b01c7715SBarry Smith       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
355b01c7715SBarry Smith       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
356b01c7715SBarry Smith       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
357b01c7715SBarry Smith       i2     = 4;
358b01c7715SBarry Smith       idiag += 16;
359b01c7715SBarry Smith       for (i=1; i<m; i++) {
360b01c7715SBarry Smith 	v     = aa + 16*ai[i];
361b01c7715SBarry Smith 	vi    = aj + ai[i];
362b01c7715SBarry Smith 	nz    = diag[i] - ai[i];
363b01c7715SBarry Smith 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
364b01c7715SBarry Smith 	while (nz--) {
365b01c7715SBarry Smith 	  idx  = 4*(*vi++);
366b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
367b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
368b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
369b01c7715SBarry Smith 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
370b01c7715SBarry Smith 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
371b01c7715SBarry Smith 	  v   += 16;
372b01c7715SBarry Smith 	}
373b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
374b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
375b01c7715SBarry Smith 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
376b01c7715SBarry Smith 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
377b01c7715SBarry Smith         idiag   += 16;
378b01c7715SBarry Smith         i2      += 4;
379b01c7715SBarry Smith       }
380b01c7715SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
381efee365bSSatish Balay       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
382b01c7715SBarry Smith     }
383b01c7715SBarry Smith     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
384b01c7715SBarry Smith         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
385b01c7715SBarry Smith       i2    = 0;
386b01c7715SBarry Smith       mdiag = a->idiag+16*a->mbs;
387b01c7715SBarry Smith       for (i=0; i<m; i++) {
388b01c7715SBarry Smith         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
389b01c7715SBarry Smith         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
390b01c7715SBarry Smith         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
391b01c7715SBarry Smith         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
392b01c7715SBarry Smith         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
393b01c7715SBarry Smith         mdiag  += 16;
394b01c7715SBarry Smith         i2     += 4;
395b01c7715SBarry Smith       }
396efee365bSSatish Balay       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
397b01c7715SBarry Smith     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
398b01c7715SBarry Smith       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
399b01c7715SBarry Smith     }
400b01c7715SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
401b01c7715SBarry Smith       idiag   = a->idiag+16*a->mbs - 16;
402b01c7715SBarry Smith       i2      = 4*m - 4;
403b01c7715SBarry Smith       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
404b01c7715SBarry Smith       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
405b01c7715SBarry Smith       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
406b01c7715SBarry Smith       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
407b01c7715SBarry Smith       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
408b01c7715SBarry Smith       idiag -= 16;
409b01c7715SBarry Smith       i2    -= 4;
410b01c7715SBarry Smith       for (i=m-2; i>=0; i--) {
411b01c7715SBarry Smith 	v     = aa + 16*(diag[i]+1);
412b01c7715SBarry Smith 	vi    = aj + diag[i] + 1;
413b01c7715SBarry Smith 	nz    = ai[i+1] - diag[i] - 1;
414b01c7715SBarry Smith 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
415b01c7715SBarry Smith 	while (nz--) {
416b01c7715SBarry Smith 	  idx  = 4*(*vi++);
417b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
418b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
419b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
420b01c7715SBarry Smith 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
421b01c7715SBarry Smith 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
422b01c7715SBarry Smith 	  v   += 16;
423b01c7715SBarry Smith 	}
424b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
425b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
426b01c7715SBarry Smith 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
427b01c7715SBarry Smith 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
428b01c7715SBarry Smith         idiag   -= 16;
429b01c7715SBarry Smith         i2      -= 4;
430b01c7715SBarry Smith       }
431efee365bSSatish Balay       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
432b01c7715SBarry Smith     }
433b01c7715SBarry Smith   } else {
434634064b4SBarry Smith     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
435b01c7715SBarry Smith   }
4361ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4371ebc52fbSHong Zhang   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
438b01c7715SBarry Smith   PetscFunctionReturn(0);
439b01c7715SBarry Smith }
440b01c7715SBarry Smith 
441b01c7715SBarry Smith #undef __FUNCT__
442b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
443c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
444b01c7715SBarry Smith {
445b01c7715SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
446b01c7715SBarry Smith   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
447b01c7715SBarry Smith   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
448dfbe8321SBarry Smith   PetscErrorCode     ierr;
449c1ac3661SBarry Smith   PetscInt           m = a->mbs,i,i2,nz,idx;
450c1ac3661SBarry Smith   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
451b01c7715SBarry Smith 
452b01c7715SBarry Smith   PetscFunctionBegin;
453b01c7715SBarry Smith   its = its*lits;
45477431f27SBarry Smith   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
455b01c7715SBarry Smith   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
456b01c7715SBarry Smith   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
457b01c7715SBarry Smith   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
458b01c7715SBarry Smith   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
459b01c7715SBarry Smith 
460b01c7715SBarry Smith   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
461b01c7715SBarry Smith 
462b01c7715SBarry Smith   diag  = a->diag;
463b01c7715SBarry Smith   idiag = a->idiag;
4641ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4651ebc52fbSHong Zhang   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
466b01c7715SBarry Smith 
467b01c7715SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
468b01c7715SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
469b01c7715SBarry Smith       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
470b01c7715SBarry Smith       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
471b01c7715SBarry Smith       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
472b01c7715SBarry Smith       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
473b01c7715SBarry Smith       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
474b01c7715SBarry Smith       i2     = 5;
475b01c7715SBarry Smith       idiag += 25;
476b01c7715SBarry Smith       for (i=1; i<m; i++) {
477b01c7715SBarry Smith 	v     = aa + 25*ai[i];
478b01c7715SBarry Smith 	vi    = aj + ai[i];
479b01c7715SBarry Smith 	nz    = diag[i] - ai[i];
480b01c7715SBarry Smith 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
481b01c7715SBarry Smith 	while (nz--) {
482b01c7715SBarry Smith 	  idx  = 5*(*vi++);
483b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
484b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
485b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
486b01c7715SBarry Smith 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
487b01c7715SBarry Smith 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
488b01c7715SBarry Smith 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
489b01c7715SBarry Smith 	  v   += 25;
490b01c7715SBarry Smith 	}
491b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
492b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
493b01c7715SBarry Smith 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
494b01c7715SBarry Smith 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
495b01c7715SBarry Smith 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
496b01c7715SBarry Smith         idiag   += 25;
497b01c7715SBarry Smith         i2      += 5;
498b01c7715SBarry Smith       }
499b01c7715SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
500efee365bSSatish Balay       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
501b01c7715SBarry Smith     }
502b01c7715SBarry Smith     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
503b01c7715SBarry Smith         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
504b01c7715SBarry Smith       i2    = 0;
505b01c7715SBarry Smith       mdiag = a->idiag+25*a->mbs;
506b01c7715SBarry Smith       for (i=0; i<m; i++) {
507b01c7715SBarry Smith         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
508b01c7715SBarry Smith         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
509b01c7715SBarry Smith         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
510b01c7715SBarry Smith         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
511b01c7715SBarry Smith         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
512b01c7715SBarry Smith         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
513b01c7715SBarry Smith         mdiag  += 25;
514b01c7715SBarry Smith         i2     += 5;
515b01c7715SBarry Smith       }
516efee365bSSatish Balay       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
517b01c7715SBarry Smith     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
518b01c7715SBarry Smith       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
519b01c7715SBarry Smith     }
520b01c7715SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
521b01c7715SBarry Smith       idiag   = a->idiag+25*a->mbs - 25;
522b01c7715SBarry Smith       i2      = 5*m - 5;
523b01c7715SBarry Smith       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
524b01c7715SBarry Smith       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
525b01c7715SBarry Smith       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
526b01c7715SBarry Smith       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
527b01c7715SBarry Smith       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
528b01c7715SBarry Smith       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
529b01c7715SBarry Smith       idiag -= 25;
530b01c7715SBarry Smith       i2    -= 5;
531b01c7715SBarry Smith       for (i=m-2; i>=0; i--) {
532b01c7715SBarry Smith 	v     = aa + 25*(diag[i]+1);
533b01c7715SBarry Smith 	vi    = aj + diag[i] + 1;
534b01c7715SBarry Smith 	nz    = ai[i+1] - diag[i] - 1;
535b01c7715SBarry Smith 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
536b01c7715SBarry Smith 	while (nz--) {
537b01c7715SBarry Smith 	  idx  = 5*(*vi++);
538b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
539b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
540b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
541b01c7715SBarry Smith 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
542b01c7715SBarry Smith 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
543b01c7715SBarry Smith 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
544b01c7715SBarry Smith 	  v   += 25;
545b01c7715SBarry Smith 	}
546b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
547b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
548b01c7715SBarry Smith 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
549b01c7715SBarry Smith 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
550b01c7715SBarry Smith 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
551b01c7715SBarry Smith         idiag   -= 25;
552b01c7715SBarry Smith         i2      -= 5;
553b01c7715SBarry Smith       }
554efee365bSSatish Balay       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
555b01c7715SBarry Smith     }
556b01c7715SBarry Smith   } else {
557634064b4SBarry Smith     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
558b01c7715SBarry Smith   }
5591ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5601ebc52fbSHong Zhang   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
561b01c7715SBarry Smith   PetscFunctionReturn(0);
562b01c7715SBarry Smith }
563b01c7715SBarry Smith 
564af674e45SBarry Smith /*
565af674e45SBarry Smith     Special version for Fun3d sequential benchmark
566af674e45SBarry Smith */
567af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
568af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
569af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
570af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4
571af674e45SBarry Smith #endif
572af674e45SBarry Smith 
5739c8c1041SBarry Smith EXTERN_C_BEGIN
574af674e45SBarry Smith #undef __FUNCT__
575af674e45SBarry Smith #define __FUNCT__ "matsetvaluesblocked4_"
576c1ac3661SBarry Smith void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
577af674e45SBarry Smith {
578af674e45SBarry Smith   Mat               A = *AA;
579af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
580c1ac3661SBarry Smith   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
581c1ac3661SBarry Smith   PetscInt          *ai=a->i,*ailen=a->ilen;
582c1ac3661SBarry Smith   PetscInt          *aj=a->j,stepval;
583f15d580aSBarry Smith   const PetscScalar *value = v;
5844bb09213Spetsc   MatScalar         *ap,*aa = a->a,*bap;
585af674e45SBarry Smith 
586af674e45SBarry Smith   PetscFunctionBegin;
587af674e45SBarry Smith   stepval = (n-1)*4;
588af674e45SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
589af674e45SBarry Smith     row  = im[k];
590af674e45SBarry Smith     rp   = aj + ai[row];
591af674e45SBarry Smith     ap   = aa + 16*ai[row];
592af674e45SBarry Smith     nrow = ailen[row];
593af674e45SBarry Smith     low  = 0;
594af674e45SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
595af674e45SBarry Smith       col = in[l];
596af674e45SBarry Smith       value = v + k*(stepval+4)*4 + l*4;
597af674e45SBarry Smith       low = 0; high = nrow;
598af674e45SBarry Smith       while (high-low > 7) {
599af674e45SBarry Smith         t = (low+high)/2;
600af674e45SBarry Smith         if (rp[t] > col) high = t;
601af674e45SBarry Smith         else             low  = t;
602af674e45SBarry Smith       }
603af674e45SBarry Smith       for (i=low; i<high; i++) {
604af674e45SBarry Smith         if (rp[i] > col) break;
605af674e45SBarry Smith         if (rp[i] == col) {
606af674e45SBarry Smith           bap  = ap +  16*i;
607af674e45SBarry Smith           for (ii=0; ii<4; ii++,value+=stepval) {
608af674e45SBarry Smith             for (jj=ii; jj<16; jj+=4) {
609af674e45SBarry Smith               bap[jj] += *value++;
610af674e45SBarry Smith             }
611af674e45SBarry Smith           }
612af674e45SBarry Smith           goto noinsert2;
613af674e45SBarry Smith         }
614af674e45SBarry Smith       }
615af674e45SBarry Smith       N = nrow++ - 1;
616af674e45SBarry Smith       /* shift up all the later entries in this row */
617af674e45SBarry Smith       for (ii=N; ii>=i; ii--) {
618af674e45SBarry Smith         rp[ii+1] = rp[ii];
619a037b02bSBarry Smith         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
620af674e45SBarry Smith       }
621af674e45SBarry Smith       if (N >= i) {
622a037b02bSBarry Smith         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
623af674e45SBarry Smith       }
624af674e45SBarry Smith       rp[i] = col;
625af674e45SBarry Smith       bap   = ap +  16*i;
626af674e45SBarry Smith       for (ii=0; ii<4; ii++,value+=stepval) {
627af674e45SBarry Smith         for (jj=ii; jj<16; jj+=4) {
628af674e45SBarry Smith           bap[jj] = *value++;
629af674e45SBarry Smith         }
630af674e45SBarry Smith       }
631af674e45SBarry Smith       noinsert2:;
632af674e45SBarry Smith       low = i;
633af674e45SBarry Smith     }
634af674e45SBarry Smith     ailen[row] = nrow;
635af674e45SBarry Smith   }
636af674e45SBarry Smith }
6379c8c1041SBarry Smith EXTERN_C_END
638af674e45SBarry Smith 
639af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
640af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4
641af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
642af674e45SBarry Smith #define matsetvalues4_ matsetvalues4
643af674e45SBarry Smith #endif
644af674e45SBarry Smith 
6459c8c1041SBarry Smith EXTERN_C_BEGIN
646af674e45SBarry Smith #undef __FUNCT__
647af674e45SBarry Smith #define __FUNCT__ "MatSetValues4_"
648c1ac3661SBarry Smith void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
649af674e45SBarry Smith {
650af674e45SBarry Smith   Mat         A = *AA;
651af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
652c1ac3661SBarry Smith   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
653c1ac3661SBarry Smith   PetscInt    *ai=a->i,*ailen=a->ilen;
654c1ac3661SBarry Smith   PetscInt    *aj=a->j,brow,bcol;
655c1ac3661SBarry Smith   PetscInt    ridx,cidx;
656af674e45SBarry Smith   MatScalar   *ap,value,*aa=a->a,*bap;
657af674e45SBarry Smith 
658af674e45SBarry Smith   PetscFunctionBegin;
659af674e45SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
660af674e45SBarry Smith     row  = im[k]; brow = row/4;
661af674e45SBarry Smith     rp   = aj + ai[brow];
662af674e45SBarry Smith     ap   = aa + 16*ai[brow];
663af674e45SBarry Smith     nrow = ailen[brow];
664af674e45SBarry Smith     low  = 0;
665af674e45SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
666af674e45SBarry Smith       col = in[l]; bcol = col/4;
667af674e45SBarry Smith       ridx = row % 4; cidx = col % 4;
668af674e45SBarry Smith       value = v[l + k*n];
669af674e45SBarry Smith       low = 0; high = nrow;
670af674e45SBarry Smith       while (high-low > 7) {
671af674e45SBarry Smith         t = (low+high)/2;
672af674e45SBarry Smith         if (rp[t] > bcol) high = t;
673af674e45SBarry Smith         else              low  = t;
674af674e45SBarry Smith       }
675af674e45SBarry Smith       for (i=low; i<high; i++) {
676af674e45SBarry Smith         if (rp[i] > bcol) break;
677af674e45SBarry Smith         if (rp[i] == bcol) {
678af674e45SBarry Smith           bap  = ap +  16*i + 4*cidx + ridx;
679af674e45SBarry Smith           *bap += value;
680af674e45SBarry Smith           goto noinsert1;
681af674e45SBarry Smith         }
682af674e45SBarry Smith       }
683af674e45SBarry Smith       N = nrow++ - 1;
684af674e45SBarry Smith       /* shift up all the later entries in this row */
685af674e45SBarry Smith       for (ii=N; ii>=i; ii--) {
686af674e45SBarry Smith         rp[ii+1] = rp[ii];
687a037b02bSBarry Smith         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
688af674e45SBarry Smith       }
689af674e45SBarry Smith       if (N>=i) {
690a037b02bSBarry Smith         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
691af674e45SBarry Smith       }
692af674e45SBarry Smith       rp[i]                    = bcol;
693af674e45SBarry Smith       ap[16*i + 4*cidx + ridx] = value;
694af674e45SBarry Smith       noinsert1:;
695af674e45SBarry Smith       low = i;
696af674e45SBarry Smith     }
697af674e45SBarry Smith     ailen[brow] = nrow;
698af674e45SBarry Smith   }
699af674e45SBarry Smith }
7009c8c1041SBarry Smith EXTERN_C_END
701af674e45SBarry Smith 
70295d5f7c2SBarry Smith /*  UGLY, ugly, ugly
70387828ca2SBarry Smith    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
7043477af42SKris Buschelman    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
70595d5f7c2SBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
70695d5f7c2SBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
70795d5f7c2SBarry Smith    into the single precision data structures.
70895d5f7c2SBarry Smith */
70995d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
710690b6cddSBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
71195d5f7c2SBarry Smith #else
71295d5f7c2SBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
71395d5f7c2SBarry Smith #endif
71495d5f7c2SBarry Smith 
7152d61bbb3SSatish Balay #define CHUNKSIZE  10
716de6a44a3SBarry Smith 
717be5855fcSBarry Smith /*
718be5855fcSBarry Smith      Checks for missing diagonals
719be5855fcSBarry Smith */
7204a2ae208SSatish Balay #undef __FUNCT__
7214a2ae208SSatish Balay #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
722dfbe8321SBarry Smith PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
723be5855fcSBarry Smith {
724be5855fcSBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
7256849ba73SBarry Smith   PetscErrorCode ierr;
726c1ac3661SBarry Smith   PetscInt       *diag,*jj = a->j,i;
727be5855fcSBarry Smith 
728be5855fcSBarry Smith   PetscFunctionBegin;
729c4992f7dSBarry Smith   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
730883fce79SBarry Smith   diag = a->diag;
7310e8e8aceSBarry Smith   for (i=0; i<a->mbs; i++) {
732be5855fcSBarry Smith     if (jj[diag[i]] != i) {
73377431f27SBarry Smith       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
734be5855fcSBarry Smith     }
735be5855fcSBarry Smith   }
736be5855fcSBarry Smith   PetscFunctionReturn(0);
737be5855fcSBarry Smith }
738be5855fcSBarry Smith 
7394a2ae208SSatish Balay #undef __FUNCT__
7404a2ae208SSatish Balay #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
741dfbe8321SBarry Smith PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
742de6a44a3SBarry Smith {
743de6a44a3SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
7446849ba73SBarry Smith   PetscErrorCode ierr;
745c1ac3661SBarry Smith   PetscInt       i,j,*diag,m = a->mbs;
746de6a44a3SBarry Smith 
7473a40ed3dSBarry Smith   PetscFunctionBegin;
748883fce79SBarry Smith   if (a->diag) PetscFunctionReturn(0);
749883fce79SBarry Smith 
750c1ac3661SBarry Smith   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
75152e6d16bSBarry Smith   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
7527fc0212eSBarry Smith   for (i=0; i<m; i++) {
753ecc615b2SBarry Smith     diag[i] = a->i[i+1];
754de6a44a3SBarry Smith     for (j=a->i[i]; j<a->i[i+1]; j++) {
755de6a44a3SBarry Smith       if (a->j[j] == i) {
756de6a44a3SBarry Smith         diag[i] = j;
757de6a44a3SBarry Smith         break;
758de6a44a3SBarry Smith       }
759de6a44a3SBarry Smith     }
760de6a44a3SBarry Smith   }
761de6a44a3SBarry Smith   a->diag = diag;
7623a40ed3dSBarry Smith   PetscFunctionReturn(0);
763de6a44a3SBarry Smith }
7642593348eSBarry Smith 
7652593348eSBarry Smith 
766690b6cddSBarry Smith EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
7673b2fbd54SBarry Smith 
7684a2ae208SSatish Balay #undef __FUNCT__
7694a2ae208SSatish Balay #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
770c1ac3661SBarry Smith static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
7713b2fbd54SBarry Smith {
7723b2fbd54SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
773dfbe8321SBarry Smith   PetscErrorCode ierr;
774c1ac3661SBarry Smith   PetscInt       n = a->mbs,i;
7753b2fbd54SBarry Smith 
7763a40ed3dSBarry Smith   PetscFunctionBegin;
7773b2fbd54SBarry Smith   *nn = n;
7783a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
7793b2fbd54SBarry Smith   if (symmetric) {
7803b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
7813b2fbd54SBarry Smith   } else if (oshift == 1) {
7823b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
783c1ac3661SBarry Smith     PetscInt nz = a->i[n];
7843b2fbd54SBarry Smith     for (i=0; i<nz; i++) a->j[i]++;
7853b2fbd54SBarry Smith     for (i=0; i<n+1; i++) a->i[i]++;
7863b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
7873b2fbd54SBarry Smith   } else {
7883b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
7893b2fbd54SBarry Smith   }
7903b2fbd54SBarry Smith 
7913a40ed3dSBarry Smith   PetscFunctionReturn(0);
7923b2fbd54SBarry Smith }
7933b2fbd54SBarry Smith 
7944a2ae208SSatish Balay #undef __FUNCT__
7954a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
796c1ac3661SBarry Smith static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
7973b2fbd54SBarry Smith {
7983b2fbd54SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
7996849ba73SBarry Smith   PetscErrorCode ierr;
800c1ac3661SBarry Smith   PetscInt       i,n = a->mbs;
8013b2fbd54SBarry Smith 
8023a40ed3dSBarry Smith   PetscFunctionBegin;
8033a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
8043b2fbd54SBarry Smith   if (symmetric) {
805606d414cSSatish Balay     ierr = PetscFree(*ia);CHKERRQ(ierr);
806606d414cSSatish Balay     ierr = PetscFree(*ja);CHKERRQ(ierr);
807af5da2bfSBarry Smith   } else if (oshift == 1) {
808c1ac3661SBarry Smith     PetscInt nz = a->i[n]-1;
8093b2fbd54SBarry Smith     for (i=0; i<nz; i++) a->j[i]--;
8103b2fbd54SBarry Smith     for (i=0; i<n+1; i++) a->i[i]--;
8113b2fbd54SBarry Smith   }
8123a40ed3dSBarry Smith   PetscFunctionReturn(0);
8133b2fbd54SBarry Smith }
8143b2fbd54SBarry Smith 
8154a2ae208SSatish Balay #undef __FUNCT__
8164a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqBAIJ"
817dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
8182d61bbb3SSatish Balay {
8192d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
820dfbe8321SBarry Smith   PetscErrorCode ierr;
8212d61bbb3SSatish Balay 
822433994e6SBarry Smith   PetscFunctionBegin;
823aa482453SBarry Smith #if defined(PETSC_USE_LOG)
82477431f27SBarry Smith   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
8252d61bbb3SSatish Balay #endif
826606d414cSSatish Balay   ierr = PetscFree(a->a);CHKERRQ(ierr);
827606d414cSSatish Balay   if (!a->singlemalloc) {
828606d414cSSatish Balay     ierr = PetscFree(a->i);CHKERRQ(ierr);
829606d414cSSatish Balay     ierr = PetscFree(a->j);CHKERRQ(ierr);
830606d414cSSatish Balay   }
831c38d4ed2SBarry Smith   if (a->row) {
832c38d4ed2SBarry Smith     ierr = ISDestroy(a->row);CHKERRQ(ierr);
833c38d4ed2SBarry Smith   }
834c38d4ed2SBarry Smith   if (a->col) {
835c38d4ed2SBarry Smith     ierr = ISDestroy(a->col);CHKERRQ(ierr);
836c38d4ed2SBarry Smith   }
837606d414cSSatish Balay   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
838a1d92eedSBarry Smith   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
839606d414cSSatish Balay   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
840606d414cSSatish Balay   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
841606d414cSSatish Balay   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
842606d414cSSatish Balay   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
843e51c0b9cSSatish Balay   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
844606d414cSSatish Balay   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
845563b5814SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
846563b5814SBarry Smith   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
847563b5814SBarry Smith #endif
848c4319e64SHong Zhang   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
84973e7a558SHong Zhang   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
850c4319e64SHong Zhang 
851606d414cSSatish Balay   ierr = PetscFree(a);CHKERRQ(ierr);
852901853e0SKris Buschelman 
853*43516a2dSKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
854901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
855901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
856901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
857901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
858901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
859901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
8602d61bbb3SSatish Balay   PetscFunctionReturn(0);
8612d61bbb3SSatish Balay }
8622d61bbb3SSatish Balay 
8634a2ae208SSatish Balay #undef __FUNCT__
8644a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_SeqBAIJ"
865dfbe8321SBarry Smith PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
8662d61bbb3SSatish Balay {
8672d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
86863ba0a88SBarry Smith   PetscErrorCode ierr;
8692d61bbb3SSatish Balay 
8702d61bbb3SSatish Balay   PetscFunctionBegin;
871aa275fccSKris Buschelman   switch (op) {
872aa275fccSKris Buschelman   case MAT_ROW_ORIENTED:
873aa275fccSKris Buschelman     a->roworiented    = PETSC_TRUE;
874aa275fccSKris Buschelman     break;
875aa275fccSKris Buschelman   case MAT_COLUMN_ORIENTED:
876aa275fccSKris Buschelman     a->roworiented    = PETSC_FALSE;
877aa275fccSKris Buschelman     break;
878aa275fccSKris Buschelman   case MAT_COLUMNS_SORTED:
879aa275fccSKris Buschelman     a->sorted         = PETSC_TRUE;
880aa275fccSKris Buschelman     break;
881aa275fccSKris Buschelman   case MAT_COLUMNS_UNSORTED:
882aa275fccSKris Buschelman     a->sorted         = PETSC_FALSE;
883aa275fccSKris Buschelman     break;
884aa275fccSKris Buschelman   case MAT_KEEP_ZEROED_ROWS:
885aa275fccSKris Buschelman     a->keepzeroedrows = PETSC_TRUE;
886aa275fccSKris Buschelman     break;
887aa275fccSKris Buschelman   case MAT_NO_NEW_NONZERO_LOCATIONS:
888aa275fccSKris Buschelman     a->nonew          = 1;
889aa275fccSKris Buschelman     break;
890aa275fccSKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
891aa275fccSKris Buschelman     a->nonew          = -1;
892aa275fccSKris Buschelman     break;
893aa275fccSKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
894aa275fccSKris Buschelman     a->nonew          = -2;
895aa275fccSKris Buschelman     break;
896aa275fccSKris Buschelman   case MAT_YES_NEW_NONZERO_LOCATIONS:
897aa275fccSKris Buschelman     a->nonew          = 0;
898aa275fccSKris Buschelman     break;
899aa275fccSKris Buschelman   case MAT_ROWS_SORTED:
900aa275fccSKris Buschelman   case MAT_ROWS_UNSORTED:
901aa275fccSKris Buschelman   case MAT_YES_NEW_DIAGONALS:
902aa275fccSKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
903aa275fccSKris Buschelman   case MAT_USE_HASH_TABLE:
90463ba0a88SBarry Smith     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
905aa275fccSKris Buschelman     break;
906aa275fccSKris Buschelman   case MAT_NO_NEW_DIAGONALS:
90729bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
90877e54ba9SKris Buschelman   case MAT_SYMMETRIC:
90977e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
9109a4540c5SBarry Smith   case MAT_NOT_SYMMETRIC:
9119a4540c5SBarry Smith   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
9129a4540c5SBarry Smith   case MAT_HERMITIAN:
9139a4540c5SBarry Smith   case MAT_NOT_HERMITIAN:
9149a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
9159a4540c5SBarry Smith   case MAT_NOT_SYMMETRY_ETERNAL:
91677e54ba9SKris Buschelman     break;
917aa275fccSKris Buschelman   default:
91829bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"unknown option");
9192d61bbb3SSatish Balay   }
9202d61bbb3SSatish Balay   PetscFunctionReturn(0);
9212d61bbb3SSatish Balay }
9222d61bbb3SSatish Balay 
9234a2ae208SSatish Balay #undef __FUNCT__
9244a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_SeqBAIJ"
925c1ac3661SBarry Smith PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
9262d61bbb3SSatish Balay {
9272d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
9286849ba73SBarry Smith   PetscErrorCode ierr;
929c1ac3661SBarry Smith   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
9303f1db9ecSBarry Smith   MatScalar      *aa,*aa_i;
93187828ca2SBarry Smith   PetscScalar    *v_i;
9322d61bbb3SSatish Balay 
9332d61bbb3SSatish Balay   PetscFunctionBegin;
934521d7252SBarry Smith   bs  = A->bs;
9352d61bbb3SSatish Balay   ai  = a->i;
9362d61bbb3SSatish Balay   aj  = a->j;
9372d61bbb3SSatish Balay   aa  = a->a;
9382d61bbb3SSatish Balay   bs2 = a->bs2;
9392d61bbb3SSatish Balay 
94077431f27SBarry Smith   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
9412d61bbb3SSatish Balay 
9422d61bbb3SSatish Balay   bn  = row/bs;   /* Block number */
9432d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
9442d61bbb3SSatish Balay   M   = ai[bn+1] - ai[bn];
9452d61bbb3SSatish Balay   *nz = bs*M;
9462d61bbb3SSatish Balay 
9472d61bbb3SSatish Balay   if (v) {
9482d61bbb3SSatish Balay     *v = 0;
9492d61bbb3SSatish Balay     if (*nz) {
95087828ca2SBarry Smith       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
9512d61bbb3SSatish Balay       for (i=0; i<M; i++) { /* for each block in the block row */
9522d61bbb3SSatish Balay         v_i  = *v + i*bs;
9532d61bbb3SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
9542d61bbb3SSatish Balay         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
9552d61bbb3SSatish Balay       }
9562d61bbb3SSatish Balay     }
9572d61bbb3SSatish Balay   }
9582d61bbb3SSatish Balay 
9592d61bbb3SSatish Balay   if (idx) {
9602d61bbb3SSatish Balay     *idx = 0;
9612d61bbb3SSatish Balay     if (*nz) {
962c1ac3661SBarry Smith       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
9632d61bbb3SSatish Balay       for (i=0; i<M; i++) { /* for each block in the block row */
9642d61bbb3SSatish Balay         idx_i = *idx + i*bs;
9652d61bbb3SSatish Balay         itmp  = bs*aj[ai[bn] + i];
9662d61bbb3SSatish Balay         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
9672d61bbb3SSatish Balay       }
9682d61bbb3SSatish Balay     }
9692d61bbb3SSatish Balay   }
9702d61bbb3SSatish Balay   PetscFunctionReturn(0);
9712d61bbb3SSatish Balay }
9722d61bbb3SSatish Balay 
9734a2ae208SSatish Balay #undef __FUNCT__
9744a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
975c1ac3661SBarry Smith PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
9762d61bbb3SSatish Balay {
977dfbe8321SBarry Smith   PetscErrorCode ierr;
978606d414cSSatish Balay 
9792d61bbb3SSatish Balay   PetscFunctionBegin;
980606d414cSSatish Balay   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
981606d414cSSatish Balay   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
9822d61bbb3SSatish Balay   PetscFunctionReturn(0);
9832d61bbb3SSatish Balay }
9842d61bbb3SSatish Balay 
9854a2ae208SSatish Balay #undef __FUNCT__
9864a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_SeqBAIJ"
987dfbe8321SBarry Smith PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
9882d61bbb3SSatish Balay {
9892d61bbb3SSatish Balay   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
9902d61bbb3SSatish Balay   Mat            C;
9916849ba73SBarry Smith   PetscErrorCode ierr;
992521d7252SBarry Smith   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
993c1ac3661SBarry Smith   PetscInt       *rows,*cols,bs2=a->bs2;
99487828ca2SBarry Smith   PetscScalar    *array;
9952d61bbb3SSatish Balay 
9962d61bbb3SSatish Balay   PetscFunctionBegin;
99729bbc08cSBarry Smith   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
998c1ac3661SBarry Smith   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
999c1ac3661SBarry Smith   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
10002d61bbb3SSatish Balay 
1001375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
100287828ca2SBarry Smith   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
100387828ca2SBarry Smith   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1004375fe846SBarry Smith #else
10053eda8832SBarry Smith   array = a->a;
1006375fe846SBarry Smith #endif
1007375fe846SBarry Smith 
10082d61bbb3SSatish Balay   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1009f204ca49SKris Buschelman   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
1010f204ca49SKris Buschelman   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1011f204ca49SKris Buschelman   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1012606d414cSSatish Balay   ierr = PetscFree(col);CHKERRQ(ierr);
1013c1ac3661SBarry Smith   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
10142d61bbb3SSatish Balay   cols = rows + bs;
10152d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
10162d61bbb3SSatish Balay     cols[0] = i*bs;
10172d61bbb3SSatish Balay     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
10182d61bbb3SSatish Balay     len = ai[i+1] - ai[i];
10192d61bbb3SSatish Balay     for (j=0; j<len; j++) {
10202d61bbb3SSatish Balay       rows[0] = (*aj++)*bs;
10212d61bbb3SSatish Balay       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
10222d61bbb3SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
10232d61bbb3SSatish Balay       array += bs2;
10242d61bbb3SSatish Balay     }
10252d61bbb3SSatish Balay   }
1026606d414cSSatish Balay   ierr = PetscFree(rows);CHKERRQ(ierr);
1027375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
1028375fe846SBarry Smith   ierr = PetscFree(array);
1029375fe846SBarry Smith #endif
10302d61bbb3SSatish Balay 
10312d61bbb3SSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10322d61bbb3SSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10332d61bbb3SSatish Balay 
1034c4992f7dSBarry Smith   if (B) {
10352d61bbb3SSatish Balay     *B = C;
10362d61bbb3SSatish Balay   } else {
1037273d9f13SBarry Smith     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
10382d61bbb3SSatish Balay   }
10392d61bbb3SSatish Balay   PetscFunctionReturn(0);
10402d61bbb3SSatish Balay }
10412d61bbb3SSatish Balay 
10424a2ae208SSatish Balay #undef __FUNCT__
10434a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Binary"
10446849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
10452593348eSBarry Smith {
1046b6490206SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
10476849ba73SBarry Smith   PetscErrorCode ierr;
1048521d7252SBarry Smith   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1049b24ad042SBarry Smith   int            fd;
105087828ca2SBarry Smith   PetscScalar    *aa;
1051ce6f0cecSBarry Smith   FILE           *file;
10522593348eSBarry Smith 
10533a40ed3dSBarry Smith   PetscFunctionBegin;
1054b0a32e0cSBarry Smith   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1055c1ac3661SBarry Smith   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1056552e946dSBarry Smith   col_lens[0] = MAT_FILE_COOKIE;
10573b2fbd54SBarry Smith 
1058273d9f13SBarry Smith   col_lens[1] = A->m;
1059273d9f13SBarry Smith   col_lens[2] = A->n;
10607e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
10612593348eSBarry Smith 
10622593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
1063b6490206SBarry Smith   count = 0;
1064b6490206SBarry Smith   for (i=0; i<a->mbs; i++) {
1065b6490206SBarry Smith     for (j=0; j<bs; j++) {
1066b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1067b6490206SBarry Smith     }
10682593348eSBarry Smith   }
10696f69ff64SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1070606d414cSSatish Balay   ierr = PetscFree(col_lens);CHKERRQ(ierr);
10712593348eSBarry Smith 
10722593348eSBarry Smith   /* store column indices (zero start index) */
1073c1ac3661SBarry Smith   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1074b6490206SBarry Smith   count = 0;
1075b6490206SBarry Smith   for (i=0; i<a->mbs; i++) {
1076b6490206SBarry Smith     for (j=0; j<bs; j++) {
1077b6490206SBarry Smith       for (k=a->i[i]; k<a->i[i+1]; k++) {
1078b6490206SBarry Smith         for (l=0; l<bs; l++) {
1079b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
10802593348eSBarry Smith         }
10812593348eSBarry Smith       }
1082b6490206SBarry Smith     }
1083b6490206SBarry Smith   }
10846f69ff64SBarry Smith   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1085606d414cSSatish Balay   ierr = PetscFree(jj);CHKERRQ(ierr);
10862593348eSBarry Smith 
10872593348eSBarry Smith   /* store nonzero values */
108887828ca2SBarry Smith   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1089b6490206SBarry Smith   count = 0;
1090b6490206SBarry Smith   for (i=0; i<a->mbs; i++) {
1091b6490206SBarry Smith     for (j=0; j<bs; j++) {
1092b6490206SBarry Smith       for (k=a->i[i]; k<a->i[i+1]; k++) {
1093b6490206SBarry Smith         for (l=0; l<bs; l++) {
10947e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
1095b6490206SBarry Smith         }
1096b6490206SBarry Smith       }
1097b6490206SBarry Smith     }
1098b6490206SBarry Smith   }
10996f69ff64SBarry Smith   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1100606d414cSSatish Balay   ierr = PetscFree(aa);CHKERRQ(ierr);
1101ce6f0cecSBarry Smith 
1102b0a32e0cSBarry Smith   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1103ce6f0cecSBarry Smith   if (file) {
1104521d7252SBarry Smith     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1105ce6f0cecSBarry Smith   }
11063a40ed3dSBarry Smith   PetscFunctionReturn(0);
11072593348eSBarry Smith }
11082593348eSBarry Smith 
11094a2ae208SSatish Balay #undef __FUNCT__
11104a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
11116849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
11122593348eSBarry Smith {
1113b6490206SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1114dfbe8321SBarry Smith   PetscErrorCode    ierr;
1115521d7252SBarry Smith   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1116f3ef73ceSBarry Smith   PetscViewerFormat format;
11172593348eSBarry Smith 
11183a40ed3dSBarry Smith   PetscFunctionBegin;
1119b0a32e0cSBarry Smith   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1120456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
112177431f27SBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1122fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1123bcd9e38bSBarry Smith     Mat aij;
1124ceb03754SKris Buschelman     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1125bcd9e38bSBarry Smith     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1126bcd9e38bSBarry Smith     ierr = MatDestroy(aij);CHKERRQ(ierr);
112704929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
112804929863SHong Zhang      PetscFunctionReturn(0);
1129fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1130b0a32e0cSBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
113144cd7ae7SLois Curfman McInnes     for (i=0; i<a->mbs; i++) {
113244cd7ae7SLois Curfman McInnes       for (j=0; j<bs; j++) {
113377431f27SBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
113444cd7ae7SLois Curfman McInnes         for (k=a->i[i]; k<a->i[i+1]; k++) {
113544cd7ae7SLois Curfman McInnes           for (l=0; l<bs; l++) {
1136aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
11370e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
113877431f27SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
11390e6d2581SBarry Smith                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
11400e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
114177431f27SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
11420e6d2581SBarry Smith                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
11430e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
114477431f27SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
11450ef38995SBarry Smith             }
114644cd7ae7SLois Curfman McInnes #else
11470ef38995SBarry Smith             if (a->a[bs2*k + l*bs + j] != 0.0) {
114877431f27SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
11490ef38995SBarry Smith             }
115044cd7ae7SLois Curfman McInnes #endif
115144cd7ae7SLois Curfman McInnes           }
115244cd7ae7SLois Curfman McInnes         }
1153b0a32e0cSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
115444cd7ae7SLois Curfman McInnes       }
115544cd7ae7SLois Curfman McInnes     }
1156b0a32e0cSBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
11570ef38995SBarry Smith   } else {
1158b0a32e0cSBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1159b6490206SBarry Smith     for (i=0; i<a->mbs; i++) {
1160b6490206SBarry Smith       for (j=0; j<bs; j++) {
116177431f27SBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1162b6490206SBarry Smith         for (k=a->i[i]; k<a->i[i+1]; k++) {
1163b6490206SBarry Smith           for (l=0; l<bs; l++) {
1164aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
11650e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
116677431f27SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
11670e6d2581SBarry Smith                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
11680e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
116977431f27SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
11700e6d2581SBarry Smith                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
11710ef38995SBarry Smith             } else {
117277431f27SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
117388685aaeSLois Curfman McInnes             }
117488685aaeSLois Curfman McInnes #else
117577431f27SBarry Smith             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
117688685aaeSLois Curfman McInnes #endif
11772593348eSBarry Smith           }
11782593348eSBarry Smith         }
1179b0a32e0cSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
11802593348eSBarry Smith       }
11812593348eSBarry Smith     }
1182b0a32e0cSBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1183b6490206SBarry Smith   }
1184b0a32e0cSBarry Smith   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
11853a40ed3dSBarry Smith   PetscFunctionReturn(0);
11862593348eSBarry Smith }
11872593348eSBarry Smith 
11884a2ae208SSatish Balay #undef __FUNCT__
11894a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
11906849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
11913270192aSSatish Balay {
119277ed5343SBarry Smith   Mat            A = (Mat) Aa;
11933270192aSSatish Balay   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
11946849ba73SBarry Smith   PetscErrorCode ierr;
1195521d7252SBarry Smith   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
11960e6d2581SBarry Smith   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
11973f1db9ecSBarry Smith   MatScalar      *aa;
1198b0a32e0cSBarry Smith   PetscViewer    viewer;
11993270192aSSatish Balay 
12003a40ed3dSBarry Smith   PetscFunctionBegin;
12013270192aSSatish Balay 
1202b65db4caSBarry Smith   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
120377ed5343SBarry Smith   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
120477ed5343SBarry Smith 
1205b0a32e0cSBarry Smith   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
120677ed5343SBarry Smith 
12073270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1208b0a32e0cSBarry Smith   color = PETSC_DRAW_BLUE;
12093270192aSSatish Balay   for (i=0,row=0; i<mbs; i++,row+=bs) {
12103270192aSSatish Balay     for (j=a->i[i]; j<a->i[i+1]; j++) {
1211273d9f13SBarry Smith       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
12123270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
12133270192aSSatish Balay       aa = a->a + j*bs2;
12143270192aSSatish Balay       for (k=0; k<bs; k++) {
12153270192aSSatish Balay         for (l=0; l<bs; l++) {
12160e6d2581SBarry Smith           if (PetscRealPart(*aa++) >=  0.) continue;
1217b0a32e0cSBarry Smith           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
12183270192aSSatish Balay         }
12193270192aSSatish Balay       }
12203270192aSSatish Balay     }
12213270192aSSatish Balay   }
1222b0a32e0cSBarry Smith   color = PETSC_DRAW_CYAN;
12233270192aSSatish Balay   for (i=0,row=0; i<mbs; i++,row+=bs) {
12243270192aSSatish Balay     for (j=a->i[i]; j<a->i[i+1]; j++) {
1225273d9f13SBarry Smith       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
12263270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
12273270192aSSatish Balay       aa = a->a + j*bs2;
12283270192aSSatish Balay       for (k=0; k<bs; k++) {
12293270192aSSatish Balay         for (l=0; l<bs; l++) {
12300e6d2581SBarry Smith           if (PetscRealPart(*aa++) != 0.) continue;
1231b0a32e0cSBarry Smith           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
12323270192aSSatish Balay         }
12333270192aSSatish Balay       }
12343270192aSSatish Balay     }
12353270192aSSatish Balay   }
12363270192aSSatish Balay 
1237b0a32e0cSBarry Smith   color = PETSC_DRAW_RED;
12383270192aSSatish Balay   for (i=0,row=0; i<mbs; i++,row+=bs) {
12393270192aSSatish Balay     for (j=a->i[i]; j<a->i[i+1]; j++) {
1240273d9f13SBarry Smith       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
12413270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
12423270192aSSatish Balay       aa = a->a + j*bs2;
12433270192aSSatish Balay       for (k=0; k<bs; k++) {
12443270192aSSatish Balay         for (l=0; l<bs; l++) {
12450e6d2581SBarry Smith           if (PetscRealPart(*aa++) <= 0.) continue;
1246b0a32e0cSBarry Smith           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
12473270192aSSatish Balay         }
12483270192aSSatish Balay       }
12493270192aSSatish Balay     }
12503270192aSSatish Balay   }
125177ed5343SBarry Smith   PetscFunctionReturn(0);
125277ed5343SBarry Smith }
12533270192aSSatish Balay 
12544a2ae208SSatish Balay #undef __FUNCT__
12554a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw"
12566849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
125777ed5343SBarry Smith {
1258dfbe8321SBarry Smith   PetscErrorCode ierr;
12590e6d2581SBarry Smith   PetscReal      xl,yl,xr,yr,w,h;
1260b0a32e0cSBarry Smith   PetscDraw      draw;
126177ed5343SBarry Smith   PetscTruth     isnull;
12623270192aSSatish Balay 
126377ed5343SBarry Smith   PetscFunctionBegin;
126477ed5343SBarry Smith 
1265b0a32e0cSBarry Smith   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1266b0a32e0cSBarry Smith   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
126777ed5343SBarry Smith 
126877ed5343SBarry Smith   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1269273d9f13SBarry Smith   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
127077ed5343SBarry Smith   xr += w;    yr += h;  xl = -w;     yl = -h;
1271b0a32e0cSBarry Smith   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1272b0a32e0cSBarry Smith   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
127377ed5343SBarry Smith   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
12743a40ed3dSBarry Smith   PetscFunctionReturn(0);
12753270192aSSatish Balay }
12763270192aSSatish Balay 
12774a2ae208SSatish Balay #undef __FUNCT__
12784a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ"
1279dfbe8321SBarry Smith PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
12802593348eSBarry Smith {
1281dfbe8321SBarry Smith   PetscErrorCode ierr;
128232077d6dSBarry Smith   PetscTruth     iascii,isbinary,isdraw;
12832593348eSBarry Smith 
12843a40ed3dSBarry Smith   PetscFunctionBegin;
128532077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1286fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1287fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
128832077d6dSBarry Smith   if (iascii){
12893a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
12900f5bd95cSBarry Smith   } else if (isbinary) {
12913a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
12920f5bd95cSBarry Smith   } else if (isdraw) {
12933a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
12945cd90555SBarry Smith   } else {
1295a5e6ed63SBarry Smith     Mat B;
1296ceb03754SKris Buschelman     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1297a5e6ed63SBarry Smith     ierr = MatView(B,viewer);CHKERRQ(ierr);
1298a5e6ed63SBarry Smith     ierr = MatDestroy(B);CHKERRQ(ierr);
12992593348eSBarry Smith   }
13003a40ed3dSBarry Smith   PetscFunctionReturn(0);
13012593348eSBarry Smith }
1302b6490206SBarry Smith 
1303cd0e1443SSatish Balay 
13044a2ae208SSatish Balay #undef __FUNCT__
13054a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_SeqBAIJ"
1306c1ac3661SBarry Smith PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1307cd0e1443SSatish Balay {
1308cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1309c1ac3661SBarry Smith   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1310c1ac3661SBarry Smith   PetscInt    *ai = a->i,*ailen = a->ilen;
1311521d7252SBarry Smith   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
13123f1db9ecSBarry Smith   MatScalar   *ap,*aa = a->a,zero = 0.0;
1313cd0e1443SSatish Balay 
13143a40ed3dSBarry Smith   PetscFunctionBegin;
13152d61bbb3SSatish Balay   for (k=0; k<m; k++) { /* loop over rows */
1316cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
131729bbc08cSBarry Smith     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
131877431f27SBarry Smith     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
13192d61bbb3SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
13202c3acbe9SBarry Smith     nrow = ailen[brow];
13212d61bbb3SSatish Balay     for (l=0; l<n; l++) { /* loop over columns */
132229bbc08cSBarry Smith       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
132377431f27SBarry Smith       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
13242d61bbb3SSatish Balay       col  = in[l] ;
13252d61bbb3SSatish Balay       bcol = col/bs;
13262d61bbb3SSatish Balay       cidx = col%bs;
13272d61bbb3SSatish Balay       ridx = row%bs;
13282d61bbb3SSatish Balay       high = nrow;
13292d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
13302d61bbb3SSatish Balay       while (high-low > 5) {
1331cd0e1443SSatish Balay         t = (low+high)/2;
1332cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
1333cd0e1443SSatish Balay         else             low  = t;
1334cd0e1443SSatish Balay       }
1335cd0e1443SSatish Balay       for (i=low; i<high; i++) {
1336cd0e1443SSatish Balay         if (rp[i] > bcol) break;
1337cd0e1443SSatish Balay         if (rp[i] == bcol) {
13382d61bbb3SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
13392d61bbb3SSatish Balay           goto finished;
1340cd0e1443SSatish Balay         }
1341cd0e1443SSatish Balay       }
13422d61bbb3SSatish Balay       *v++ = zero;
13432d61bbb3SSatish Balay       finished:;
1344cd0e1443SSatish Balay     }
1345cd0e1443SSatish Balay   }
13463a40ed3dSBarry Smith   PetscFunctionReturn(0);
1347cd0e1443SSatish Balay }
1348cd0e1443SSatish Balay 
134995d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
13504a2ae208SSatish Balay #undef __FUNCT__
13514a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1352c1ac3661SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
135395d5f7c2SBarry Smith {
1354563b5814SBarry Smith   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1355dfbe8321SBarry Smith   PetscErrorCode ierr;
1356c1ac3661SBarry Smith   PetscInt       i,N = m*n*b->bs2;
1357563b5814SBarry Smith   MatScalar      *vsingle;
135895d5f7c2SBarry Smith 
135995d5f7c2SBarry Smith   PetscFunctionBegin;
1360563b5814SBarry Smith   if (N > b->setvalueslen) {
1361563b5814SBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1362b0a32e0cSBarry Smith     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1363563b5814SBarry Smith     b->setvalueslen  = N;
1364563b5814SBarry Smith   }
1365563b5814SBarry Smith   vsingle = b->setvaluescopy;
136695d5f7c2SBarry Smith   for (i=0; i<N; i++) {
136795d5f7c2SBarry Smith     vsingle[i] = v[i];
136895d5f7c2SBarry Smith   }
136995d5f7c2SBarry Smith   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
137095d5f7c2SBarry Smith   PetscFunctionReturn(0);
137195d5f7c2SBarry Smith }
137295d5f7c2SBarry Smith #endif
137395d5f7c2SBarry Smith 
13742d61bbb3SSatish Balay 
13754a2ae208SSatish Balay #undef __FUNCT__
13764a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1377c1ac3661SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
137892c4ed94SBarry Smith {
137992c4ed94SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1380e2ee6c50SBarry Smith   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1381c1ac3661SBarry Smith   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
13826849ba73SBarry Smith   PetscErrorCode  ierr;
1383521d7252SBarry Smith   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1384273d9f13SBarry Smith   PetscTruth      roworiented=a->roworiented;
1385f15d580aSBarry Smith   const MatScalar *value = v;
1386f15d580aSBarry Smith   MatScalar       *ap,*aa = a->a,*bap;
138792c4ed94SBarry Smith 
13883a40ed3dSBarry Smith   PetscFunctionBegin;
13890e324ae4SSatish Balay   if (roworiented) {
13900e324ae4SSatish Balay     stepval = (n-1)*bs;
13910e324ae4SSatish Balay   } else {
13920e324ae4SSatish Balay     stepval = (m-1)*bs;
13930e324ae4SSatish Balay   }
139492c4ed94SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
139592c4ed94SBarry Smith     row  = im[k];
13965ef9f2a5SBarry Smith     if (row < 0) continue;
13972515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
139877431f27SBarry Smith     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
139992c4ed94SBarry Smith #endif
140092c4ed94SBarry Smith     rp   = aj + ai[row];
140192c4ed94SBarry Smith     ap   = aa + bs2*ai[row];
140292c4ed94SBarry Smith     rmax = imax[row];
140392c4ed94SBarry Smith     nrow = ailen[row];
140492c4ed94SBarry Smith     low  = 0;
1405c71e6ed7SBarry Smith     high = nrow;
140692c4ed94SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
14075ef9f2a5SBarry Smith       if (in[l] < 0) continue;
14082515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
140977431f27SBarry Smith       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
141092c4ed94SBarry Smith #endif
141192c4ed94SBarry Smith       col = in[l];
141292c4ed94SBarry Smith       if (roworiented) {
14130e324ae4SSatish Balay         value = v + k*(stepval+bs)*bs + l*bs;
14140e324ae4SSatish Balay       } else {
14150e324ae4SSatish Balay         value = v + l*(stepval+bs)*bs + k*bs;
141692c4ed94SBarry Smith       }
1417c71e6ed7SBarry Smith       if (col < lastcol) low = 0; else high = nrow;
1418e2ee6c50SBarry Smith       lastcol = col;
141992c4ed94SBarry Smith       while (high-low > 7) {
142092c4ed94SBarry Smith         t = (low+high)/2;
142192c4ed94SBarry Smith         if (rp[t] > col) high = t;
142292c4ed94SBarry Smith         else             low  = t;
142392c4ed94SBarry Smith       }
142492c4ed94SBarry Smith       for (i=low; i<high; i++) {
142592c4ed94SBarry Smith         if (rp[i] > col) break;
142692c4ed94SBarry Smith         if (rp[i] == col) {
14278a84c255SSatish Balay           bap  = ap +  bs2*i;
14280e324ae4SSatish Balay           if (roworiented) {
14298a84c255SSatish Balay             if (is == ADD_VALUES) {
1430dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1431dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs) {
14328a84c255SSatish Balay                   bap[jj] += *value++;
1433dd9472c6SBarry Smith                 }
1434dd9472c6SBarry Smith               }
14350e324ae4SSatish Balay             } else {
1436dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1437dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs) {
14380e324ae4SSatish Balay                   bap[jj] = *value++;
14398a84c255SSatish Balay                 }
1440dd9472c6SBarry Smith               }
1441dd9472c6SBarry Smith             }
14420e324ae4SSatish Balay           } else {
14430e324ae4SSatish Balay             if (is == ADD_VALUES) {
1444dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1445dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++) {
14460e324ae4SSatish Balay                   *bap++ += *value++;
1447dd9472c6SBarry Smith                 }
1448dd9472c6SBarry Smith               }
14490e324ae4SSatish Balay             } else {
1450dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1451dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++) {
14520e324ae4SSatish Balay                   *bap++  = *value++;
14530e324ae4SSatish Balay                 }
14548a84c255SSatish Balay               }
1455dd9472c6SBarry Smith             }
1456dd9472c6SBarry Smith           }
1457f1241b54SBarry Smith           goto noinsert2;
145892c4ed94SBarry Smith         }
145992c4ed94SBarry Smith       }
146089280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
146177431f27SBarry Smith       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
146292c4ed94SBarry Smith       if (nrow >= rmax) {
146392c4ed94SBarry Smith         /* there is no extra room in row, therefore enlarge */
1464c1ac3661SBarry Smith         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
14653f1db9ecSBarry Smith         MatScalar *new_a;
146692c4ed94SBarry Smith 
146777431f27SBarry Smith         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
146889280ab3SLois Curfman McInnes 
146992c4ed94SBarry Smith         /* malloc new storage space */
1470c1ac3661SBarry Smith         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1471b0a32e0cSBarry Smith 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1472690b6cddSBarry Smith         new_j   = (PetscInt*)(new_a + bs2*new_nz);
147392c4ed94SBarry Smith         new_i   = new_j + new_nz;
147492c4ed94SBarry Smith 
147592c4ed94SBarry Smith         /* copy over old data into new slots */
147692c4ed94SBarry Smith         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
147792c4ed94SBarry Smith         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1478c1ac3661SBarry Smith         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
147992c4ed94SBarry Smith         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1480c1ac3661SBarry Smith         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1481549d3d68SSatish Balay         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1482549d3d68SSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1483549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
148492c4ed94SBarry Smith         /* free up old matrix storage */
1485606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);
1486606d414cSSatish Balay         if (!a->singlemalloc) {
1487606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr);
1488606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);
1489606d414cSSatish Balay         }
149092c4ed94SBarry Smith         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1491c4992f7dSBarry Smith         a->singlemalloc = PETSC_TRUE;
149292c4ed94SBarry Smith 
149392c4ed94SBarry Smith         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
149492c4ed94SBarry Smith         rmax = imax[row] = imax[row] + CHUNKSIZE;
149552e6d16bSBarry Smith         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
149692c4ed94SBarry Smith         a->maxnz += bs2*CHUNKSIZE;
149792c4ed94SBarry Smith         a->reallocs++;
149892c4ed94SBarry Smith         a->nz++;
149992c4ed94SBarry Smith       }
150092c4ed94SBarry Smith       N = nrow++ - 1;
150192c4ed94SBarry Smith       /* shift up all the later entries in this row */
150292c4ed94SBarry Smith       for (ii=N; ii>=i; ii--) {
150392c4ed94SBarry Smith         rp[ii+1] = rp[ii];
1504549d3d68SSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
150592c4ed94SBarry Smith       }
1506549d3d68SSatish Balay       if (N >= i) {
1507549d3d68SSatish Balay         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1508549d3d68SSatish Balay       }
150992c4ed94SBarry Smith       rp[i] = col;
15108a84c255SSatish Balay       bap   = ap +  bs2*i;
15110e324ae4SSatish Balay       if (roworiented) {
1512dd9472c6SBarry Smith         for (ii=0; ii<bs; ii++,value+=stepval) {
1513dd9472c6SBarry Smith           for (jj=ii; jj<bs2; jj+=bs) {
15140e324ae4SSatish Balay             bap[jj] = *value++;
1515dd9472c6SBarry Smith           }
1516dd9472c6SBarry Smith         }
15170e324ae4SSatish Balay       } else {
1518dd9472c6SBarry Smith         for (ii=0; ii<bs; ii++,value+=stepval) {
1519dd9472c6SBarry Smith           for (jj=0; jj<bs; jj++) {
15200e324ae4SSatish Balay             *bap++  = *value++;
15210e324ae4SSatish Balay           }
1522dd9472c6SBarry Smith         }
1523dd9472c6SBarry Smith       }
1524f1241b54SBarry Smith       noinsert2:;
152592c4ed94SBarry Smith       low = i;
152692c4ed94SBarry Smith     }
152792c4ed94SBarry Smith     ailen[row] = nrow;
152892c4ed94SBarry Smith   }
15293a40ed3dSBarry Smith   PetscFunctionReturn(0);
153092c4ed94SBarry Smith }
153126e093fcSHong Zhang 
15324a2ae208SSatish Balay #undef __FUNCT__
15334a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1534dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1535584200bdSSatish Balay {
1536584200bdSSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1537c1ac3661SBarry Smith   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1538c1ac3661SBarry Smith   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
15396849ba73SBarry Smith   PetscErrorCode ierr;
1540c1ac3661SBarry Smith   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
15413f1db9ecSBarry Smith   MatScalar      *aa = a->a,*ap;
15423447b6efSHong Zhang   PetscReal      ratio=0.6;
1543584200bdSSatish Balay 
15443a40ed3dSBarry Smith   PetscFunctionBegin;
15453a40ed3dSBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1546584200bdSSatish Balay 
154743ee02c3SBarry Smith   if (m) rmax = ailen[0];
1548584200bdSSatish Balay   for (i=1; i<mbs; i++) {
1549584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
1550584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
1551d402145bSBarry Smith     rmax   = PetscMax(rmax,ailen[i]);
1552584200bdSSatish Balay     if (fshift) {
1553a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1554584200bdSSatish Balay       N = ailen[i];
1555584200bdSSatish Balay       for (j=0; j<N; j++) {
1556584200bdSSatish Balay         ip[j-fshift] = ip[j];
1557549d3d68SSatish Balay         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1558584200bdSSatish Balay       }
1559584200bdSSatish Balay     }
1560584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
1561584200bdSSatish Balay   }
1562584200bdSSatish Balay   if (mbs) {
1563584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
1564584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1565584200bdSSatish Balay   }
1566584200bdSSatish Balay   /* reset ilen and imax for each row */
1567584200bdSSatish Balay   for (i=0; i<mbs; i++) {
1568584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
1569584200bdSSatish Balay   }
1570a7c10996SSatish Balay   a->nz = ai[mbs];
1571584200bdSSatish Balay 
1572584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
1573b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
1574584200bdSSatish Balay   if (fshift && a->diag) {
1575606d414cSSatish Balay     ierr = PetscFree(a->diag);CHKERRQ(ierr);
157652e6d16bSBarry Smith     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1577584200bdSSatish Balay     a->diag = 0;
1578584200bdSSatish Balay   }
157963ba0a88SBarry Smith   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
158063ba0a88SBarry Smith   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
158163ba0a88SBarry Smith   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1582e2f3b5e9SSatish Balay   a->reallocs          = 0;
15830e6d2581SBarry Smith   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1584cf4441caSHong Zhang 
1585cb5d8e9eSHong Zhang   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
15862f53aa61SHong Zhang   if (a->compressedrow.use){
1587317fbc4cSHong Zhang     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
158873e7a558SHong Zhang   }
158988e51ccdSHong Zhang 
159088e51ccdSHong Zhang   A->same_nonzero = PETSC_TRUE;
15913a40ed3dSBarry Smith   PetscFunctionReturn(0);
1592584200bdSSatish Balay }
1593584200bdSSatish Balay 
1594bea157c4SSatish Balay /*
1595bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
1596bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1597bea157c4SSatish Balay    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1598bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
1599bea157c4SSatish Balay */
16004a2ae208SSatish Balay #undef __FUNCT__
16014a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1602c1ac3661SBarry Smith static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1603d9b7c43dSSatish Balay {
1604c1ac3661SBarry Smith   PetscInt   i,j,k,row;
1605bea157c4SSatish Balay   PetscTruth flg;
16063a40ed3dSBarry Smith 
1607433994e6SBarry Smith   PetscFunctionBegin;
1608bea157c4SSatish Balay   for (i=0,j=0; i<n; j++) {
1609bea157c4SSatish Balay     row = idx[i];
1610bea157c4SSatish Balay     if (row%bs!=0) { /* Not the begining of a block */
1611bea157c4SSatish Balay       sizes[j] = 1;
1612bea157c4SSatish Balay       i++;
1613e4fda26cSSatish Balay     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1614bea157c4SSatish Balay       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1615bea157c4SSatish Balay       i++;
1616bea157c4SSatish Balay     } else { /* Begining of the block, so check if the complete block exists */
1617bea157c4SSatish Balay       flg = PETSC_TRUE;
1618bea157c4SSatish Balay       for (k=1; k<bs; k++) {
1619bea157c4SSatish Balay         if (row+k != idx[i+k]) { /* break in the block */
1620bea157c4SSatish Balay           flg = PETSC_FALSE;
1621bea157c4SSatish Balay           break;
1622d9b7c43dSSatish Balay         }
1623bea157c4SSatish Balay       }
1624abc0a331SBarry Smith       if (flg) { /* No break in the bs */
1625bea157c4SSatish Balay         sizes[j] = bs;
1626bea157c4SSatish Balay         i+= bs;
1627bea157c4SSatish Balay       } else {
1628bea157c4SSatish Balay         sizes[j] = 1;
1629bea157c4SSatish Balay         i++;
1630bea157c4SSatish Balay       }
1631bea157c4SSatish Balay     }
1632bea157c4SSatish Balay   }
1633bea157c4SSatish Balay   *bs_max = j;
16343a40ed3dSBarry Smith   PetscFunctionReturn(0);
1635d9b7c43dSSatish Balay }
1636d9b7c43dSSatish Balay 
16374a2ae208SSatish Balay #undef __FUNCT__
16384a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1639dfbe8321SBarry Smith PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1640d9b7c43dSSatish Balay {
1641d9b7c43dSSatish Balay   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1642dfbe8321SBarry Smith   PetscErrorCode ierr;
1643c1ac3661SBarry Smith   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1644521d7252SBarry Smith   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
164587828ca2SBarry Smith   PetscScalar    zero = 0.0;
16463f1db9ecSBarry Smith   MatScalar      *aa;
1647d9b7c43dSSatish Balay 
16483a40ed3dSBarry Smith   PetscFunctionBegin;
1649d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1650b9b97703SBarry Smith   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1651d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1652d9b7c43dSSatish Balay 
1653bea157c4SSatish Balay   /* allocate memory for rows,sizes */
1654c1ac3661SBarry Smith   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1655bea157c4SSatish Balay   sizes = rows + is_n;
1656bea157c4SSatish Balay 
1657563b5814SBarry Smith   /* copy IS values to rows, and sort them */
1658bea157c4SSatish Balay   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1659bea157c4SSatish Balay   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1660dffd3267SBarry Smith   if (baij->keepzeroedrows) {
1661dffd3267SBarry Smith     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1662dffd3267SBarry Smith     bs_max = is_n;
166388e51ccdSHong Zhang     A->same_nonzero = PETSC_TRUE;
1664dffd3267SBarry Smith   } else {
1665bea157c4SSatish Balay     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
166688e51ccdSHong Zhang     A->same_nonzero = PETSC_FALSE;
1667dffd3267SBarry Smith   }
1668b97a56abSSatish Balay   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1669bea157c4SSatish Balay 
1670bea157c4SSatish Balay   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1671bea157c4SSatish Balay     row   = rows[j];
167277431f27SBarry Smith     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1673bea157c4SSatish Balay     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1674bea157c4SSatish Balay     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1675dffd3267SBarry Smith     if (sizes[i] == bs && !baij->keepzeroedrows) {
1676bea157c4SSatish Balay       if (diag) {
1677bea157c4SSatish Balay         if (baij->ilen[row/bs] > 0) {
1678bea157c4SSatish Balay           baij->ilen[row/bs]       = 1;
1679bea157c4SSatish Balay           baij->j[baij->i[row/bs]] = row/bs;
1680bea157c4SSatish Balay           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1681a07cd24cSSatish Balay         }
1682563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
1683bea157c4SSatish Balay         for (k=0; k<bs; k++) {
1684bea157c4SSatish Balay           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1685bea157c4SSatish Balay         }
1686bea157c4SSatish Balay       } else { /* (!diag) */
1687bea157c4SSatish Balay         baij->ilen[row/bs] = 0;
1688bea157c4SSatish Balay       } /* end (!diag) */
1689bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
1690aa482453SBarry Smith #if defined (PETSC_USE_DEBUG)
1691634064b4SBarry Smith       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1692bea157c4SSatish Balay #endif
1693bea157c4SSatish Balay       for (k=0; k<count; k++) {
1694d9b7c43dSSatish Balay         aa[0] =  zero;
1695d9b7c43dSSatish Balay         aa    += bs;
1696d9b7c43dSSatish Balay       }
1697d9b7c43dSSatish Balay       if (diag) {
1698f830108cSBarry Smith         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1699d9b7c43dSSatish Balay       }
1700d9b7c43dSSatish Balay     }
1701bea157c4SSatish Balay   }
1702bea157c4SSatish Balay 
1703606d414cSSatish Balay   ierr = PetscFree(rows);CHKERRQ(ierr);
17049a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17053a40ed3dSBarry Smith   PetscFunctionReturn(0);
1706d9b7c43dSSatish Balay }
17071c351548SSatish Balay 
17084a2ae208SSatish Balay #undef __FUNCT__
17094a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_SeqBAIJ"
1710c1ac3661SBarry Smith PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
17112d61bbb3SSatish Balay {
17122d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1713e2ee6c50SBarry Smith   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1714c1ac3661SBarry Smith   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1715521d7252SBarry Smith   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
17166849ba73SBarry Smith   PetscErrorCode ierr;
1717c1ac3661SBarry Smith   PetscInt       ridx,cidx,bs2=a->bs2;
1718273d9f13SBarry Smith   PetscTruth     roworiented=a->roworiented;
17193f1db9ecSBarry Smith   MatScalar      *ap,value,*aa=a->a,*bap;
17202d61bbb3SSatish Balay 
17212d61bbb3SSatish Balay   PetscFunctionBegin;
17222d61bbb3SSatish Balay   for (k=0; k<m; k++) { /* loop over added rows */
17232d61bbb3SSatish Balay     row  = im[k]; brow = row/bs;
17245ef9f2a5SBarry Smith     if (row < 0) continue;
17252515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
172677431f27SBarry Smith     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
17272d61bbb3SSatish Balay #endif
17282d61bbb3SSatish Balay     rp   = aj + ai[brow];
17292d61bbb3SSatish Balay     ap   = aa + bs2*ai[brow];
17302d61bbb3SSatish Balay     rmax = imax[brow];
17312d61bbb3SSatish Balay     nrow = ailen[brow];
17322d61bbb3SSatish Balay     low  = 0;
1733c71e6ed7SBarry Smith     high = nrow;
17342d61bbb3SSatish Balay     for (l=0; l<n; l++) { /* loop over added columns */
17355ef9f2a5SBarry Smith       if (in[l] < 0) continue;
17362515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
173777431f27SBarry Smith       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
17382d61bbb3SSatish Balay #endif
17392d61bbb3SSatish Balay       col = in[l]; bcol = col/bs;
17402d61bbb3SSatish Balay       ridx = row % bs; cidx = col % bs;
17412d61bbb3SSatish Balay       if (roworiented) {
17425ef9f2a5SBarry Smith         value = v[l + k*n];
17432d61bbb3SSatish Balay       } else {
17442d61bbb3SSatish Balay         value = v[k + l*m];
17452d61bbb3SSatish Balay       }
1746c71e6ed7SBarry Smith       if (col < lastcol) low = 0; else high = nrow;
1747e2ee6c50SBarry Smith       lastcol = col;
17482d61bbb3SSatish Balay       while (high-low > 7) {
17492d61bbb3SSatish Balay         t = (low+high)/2;
17502d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
17512d61bbb3SSatish Balay         else              low  = t;
17522d61bbb3SSatish Balay       }
17532d61bbb3SSatish Balay       for (i=low; i<high; i++) {
17542d61bbb3SSatish Balay         if (rp[i] > bcol) break;
17552d61bbb3SSatish Balay         if (rp[i] == bcol) {
17562d61bbb3SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
17572d61bbb3SSatish Balay           if (is == ADD_VALUES) *bap += value;
17582d61bbb3SSatish Balay           else                  *bap  = value;
17592d61bbb3SSatish Balay           goto noinsert1;
17602d61bbb3SSatish Balay         }
17612d61bbb3SSatish Balay       }
17622d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
176377431f27SBarry Smith       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
17642d61bbb3SSatish Balay       if (nrow >= rmax) {
17652d61bbb3SSatish Balay         /* there is no extra room in row, therefore enlarge */
1766c1ac3661SBarry Smith         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
17673f1db9ecSBarry Smith         MatScalar *new_a;
17682d61bbb3SSatish Balay 
176977431f27SBarry Smith         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
17702d61bbb3SSatish Balay 
17712d61bbb3SSatish Balay         /* Malloc new storage space */
1772c1ac3661SBarry Smith         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1773b0a32e0cSBarry Smith 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1774c1ac3661SBarry Smith         new_j   = (PetscInt*)(new_a + bs2*new_nz);
17752d61bbb3SSatish Balay         new_i   = new_j + new_nz;
17762d61bbb3SSatish Balay 
17772d61bbb3SSatish Balay         /* copy over old data into new slots */
17782d61bbb3SSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
17792d61bbb3SSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1780c1ac3661SBarry Smith         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
17812d61bbb3SSatish Balay         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1782c1ac3661SBarry Smith         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1783549d3d68SSatish Balay         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1784549d3d68SSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1785549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
17862d61bbb3SSatish Balay         /* free up old matrix storage */
1787606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);
1788606d414cSSatish Balay         if (!a->singlemalloc) {
1789606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr);
1790606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);
1791606d414cSSatish Balay         }
17922d61bbb3SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1793c4992f7dSBarry Smith         a->singlemalloc = PETSC_TRUE;
17942d61bbb3SSatish Balay 
17952d61bbb3SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
17962d61bbb3SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
179752e6d16bSBarry Smith         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
17982d61bbb3SSatish Balay         a->maxnz += bs2*CHUNKSIZE;
17992d61bbb3SSatish Balay         a->reallocs++;
18002d61bbb3SSatish Balay         a->nz++;
18012d61bbb3SSatish Balay       }
18022d61bbb3SSatish Balay       N = nrow++ - 1;
18032d61bbb3SSatish Balay       /* shift up all the later entries in this row */
18042d61bbb3SSatish Balay       for (ii=N; ii>=i; ii--) {
18052d61bbb3SSatish Balay         rp[ii+1] = rp[ii];
1806549d3d68SSatish Balay         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
18072d61bbb3SSatish Balay       }
1808549d3d68SSatish Balay       if (N>=i) {
1809549d3d68SSatish Balay         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1810549d3d68SSatish Balay       }
18112d61bbb3SSatish Balay       rp[i]                      = bcol;
18122d61bbb3SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
18132d61bbb3SSatish Balay       noinsert1:;
18142d61bbb3SSatish Balay       low = i;
18152d61bbb3SSatish Balay     }
18162d61bbb3SSatish Balay     ailen[brow] = nrow;
18172d61bbb3SSatish Balay   }
181888e51ccdSHong Zhang   A->same_nonzero = PETSC_FALSE;
18192d61bbb3SSatish Balay   PetscFunctionReturn(0);
18202d61bbb3SSatish Balay }
18212d61bbb3SSatish Balay 
18222d61bbb3SSatish Balay 
18234a2ae208SSatish Balay #undef __FUNCT__
18244a2ae208SSatish Balay #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1825dfbe8321SBarry Smith PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
18262d61bbb3SSatish Balay {
18272d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
18282d61bbb3SSatish Balay   Mat            outA;
1829dfbe8321SBarry Smith   PetscErrorCode ierr;
1830667159a5SBarry Smith   PetscTruth     row_identity,col_identity;
18312d61bbb3SSatish Balay 
18322d61bbb3SSatish Balay   PetscFunctionBegin;
1833d3d32019SBarry Smith   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1834667159a5SBarry Smith   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1835667159a5SBarry Smith   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1836667159a5SBarry Smith   if (!row_identity || !col_identity) {
1837634064b4SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1838667159a5SBarry Smith   }
18392d61bbb3SSatish Balay 
18402d61bbb3SSatish Balay   outA          = inA;
18412d61bbb3SSatish Balay   inA->factor   = FACTOR_LU;
18422d61bbb3SSatish Balay 
18432d61bbb3SSatish Balay   if (!a->diag) {
1844c4992f7dSBarry Smith     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
18452d61bbb3SSatish Balay   }
1846cf242676SKris Buschelman 
1847c38d4ed2SBarry Smith   a->row        = row;
1848c38d4ed2SBarry Smith   a->col        = col;
1849c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1850c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1851c38d4ed2SBarry Smith 
1852c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
18534c49b128SBarry Smith   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
185452e6d16bSBarry Smith   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1855c38d4ed2SBarry Smith 
1856cf242676SKris Buschelman   /*
1857cf242676SKris Buschelman       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1858cf242676SKris Buschelman       for ILU(0) factorization with natural ordering
1859cf242676SKris Buschelman   */
1860521d7252SBarry Smith   if (inA->bs < 8) {
1861cf242676SKris Buschelman     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1862cf242676SKris Buschelman   } else {
1863c38d4ed2SBarry Smith     if (!a->solve_work) {
1864521d7252SBarry Smith       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
186552e6d16bSBarry Smith       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1866c38d4ed2SBarry Smith     }
18672d61bbb3SSatish Balay   }
18682d61bbb3SSatish Balay 
1869af281ebdSHong Zhang   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1870667159a5SBarry Smith 
18712d61bbb3SSatish Balay   PetscFunctionReturn(0);
18722d61bbb3SSatish Balay }
18734a2ae208SSatish Balay #undef __FUNCT__
18744a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1875dfbe8321SBarry Smith PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1876ba4ca20aSSatish Balay {
1877c38d4ed2SBarry Smith   static PetscTruth called = PETSC_FALSE;
1878ba4ca20aSSatish Balay   MPI_Comm          comm = A->comm;
1879dfbe8321SBarry Smith   PetscErrorCode    ierr;
1880ba4ca20aSSatish Balay 
18813a40ed3dSBarry Smith   PetscFunctionBegin;
1882c38d4ed2SBarry Smith   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1883d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1884d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
18853a40ed3dSBarry Smith   PetscFunctionReturn(0);
1886ba4ca20aSSatish Balay }
1887d9b7c43dSSatish Balay 
1888fb2e594dSBarry Smith EXTERN_C_BEGIN
18894a2ae208SSatish Balay #undef __FUNCT__
18904a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1891c1ac3661SBarry Smith PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
189227a8da17SBarry Smith {
189327a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1894c1ac3661SBarry Smith   PetscInt    i,nz,nbs;
189527a8da17SBarry Smith 
189627a8da17SBarry Smith   PetscFunctionBegin;
189714db4f74SSatish Balay   nz  = baij->maxnz/baij->bs2;
189814db4f74SSatish Balay   nbs = baij->nbs;
189927a8da17SBarry Smith   for (i=0; i<nz; i++) {
190027a8da17SBarry Smith     baij->j[i] = indices[i];
190127a8da17SBarry Smith   }
190227a8da17SBarry Smith   baij->nz = nz;
190314db4f74SSatish Balay   for (i=0; i<nbs; i++) {
190427a8da17SBarry Smith     baij->ilen[i] = baij->imax[i];
190527a8da17SBarry Smith   }
190627a8da17SBarry Smith 
190727a8da17SBarry Smith   PetscFunctionReturn(0);
190827a8da17SBarry Smith }
1909fb2e594dSBarry Smith EXTERN_C_END
191027a8da17SBarry Smith 
19114a2ae208SSatish Balay #undef __FUNCT__
19124a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
191327a8da17SBarry Smith /*@
191427a8da17SBarry Smith     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
191527a8da17SBarry Smith        in the matrix.
191627a8da17SBarry Smith 
191727a8da17SBarry Smith   Input Parameters:
191827a8da17SBarry Smith +  mat - the SeqBAIJ matrix
191927a8da17SBarry Smith -  indices - the column indices
192027a8da17SBarry Smith 
192115091d37SBarry Smith   Level: advanced
192215091d37SBarry Smith 
192327a8da17SBarry Smith   Notes:
192427a8da17SBarry Smith     This can be called if you have precomputed the nonzero structure of the
192527a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
192627a8da17SBarry Smith   of the MatSetValues() operation.
192727a8da17SBarry Smith 
192827a8da17SBarry Smith     You MUST have set the correct numbers of nonzeros per row in the call to
1929d1be2dadSMatthew Knepley   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
193027a8da17SBarry Smith 
193127a8da17SBarry Smith     MUST be called before any calls to MatSetValues();
193227a8da17SBarry Smith 
193327a8da17SBarry Smith @*/
1934c1ac3661SBarry Smith PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
193527a8da17SBarry Smith {
1936c1ac3661SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
193727a8da17SBarry Smith 
193827a8da17SBarry Smith   PetscFunctionBegin;
19394482741eSBarry Smith   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
19404482741eSBarry Smith   PetscValidPointer(indices,2);
1941c134de8dSSatish Balay   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
194227a8da17SBarry Smith   if (f) {
194327a8da17SBarry Smith     ierr = (*f)(mat,indices);CHKERRQ(ierr);
194427a8da17SBarry Smith   } else {
1945634064b4SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
194627a8da17SBarry Smith   }
194727a8da17SBarry Smith   PetscFunctionReturn(0);
194827a8da17SBarry Smith }
194927a8da17SBarry Smith 
19504a2ae208SSatish Balay #undef __FUNCT__
19514a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1952dfbe8321SBarry Smith PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1953273d9f13SBarry Smith {
1954273d9f13SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1955dfbe8321SBarry Smith   PetscErrorCode ierr;
1956c1ac3661SBarry Smith   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1957273d9f13SBarry Smith   PetscReal      atmp;
195887828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
1959273d9f13SBarry Smith   MatScalar      *aa;
1960c1ac3661SBarry Smith   PetscInt       ncols,brow,krow,kcol;
1961273d9f13SBarry Smith 
1962273d9f13SBarry Smith   PetscFunctionBegin;
1963273d9f13SBarry Smith   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1964521d7252SBarry Smith   bs   = A->bs;
1965273d9f13SBarry Smith   aa   = a->a;
1966273d9f13SBarry Smith   ai   = a->i;
1967273d9f13SBarry Smith   aj   = a->j;
1968273d9f13SBarry Smith   mbs = a->mbs;
1969273d9f13SBarry Smith 
1970273d9f13SBarry Smith   ierr = VecSet(&zero,v);CHKERRQ(ierr);
19711ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1972273d9f13SBarry Smith   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1973273d9f13SBarry Smith   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1974273d9f13SBarry Smith   for (i=0; i<mbs; i++) {
1975273d9f13SBarry Smith     ncols = ai[1] - ai[0]; ai++;
1976273d9f13SBarry Smith     brow  = bs*i;
1977273d9f13SBarry Smith     for (j=0; j<ncols; j++){
1978273d9f13SBarry Smith       /* bcol = bs*(*aj); */
1979273d9f13SBarry Smith       for (kcol=0; kcol<bs; kcol++){
1980273d9f13SBarry Smith         for (krow=0; krow<bs; krow++){
1981273d9f13SBarry Smith           atmp = PetscAbsScalar(*aa); aa++;
1982273d9f13SBarry Smith           row = brow + krow;    /* row index */
1983273d9f13SBarry Smith           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1984273d9f13SBarry Smith           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1985273d9f13SBarry Smith         }
1986273d9f13SBarry Smith       }
1987273d9f13SBarry Smith       aj++;
1988273d9f13SBarry Smith     }
1989273d9f13SBarry Smith   }
19901ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1991273d9f13SBarry Smith   PetscFunctionReturn(0);
1992273d9f13SBarry Smith }
1993273d9f13SBarry Smith 
19944a2ae208SSatish Balay #undef __FUNCT__
19954a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1996dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1997273d9f13SBarry Smith {
1998dfbe8321SBarry Smith   PetscErrorCode ierr;
1999273d9f13SBarry Smith 
2000273d9f13SBarry Smith   PetscFunctionBegin;
2001273d9f13SBarry Smith   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
2002273d9f13SBarry Smith   PetscFunctionReturn(0);
2003273d9f13SBarry Smith }
2004273d9f13SBarry Smith 
20054a2ae208SSatish Balay #undef __FUNCT__
20064a2ae208SSatish Balay #define __FUNCT__ "MatGetArray_SeqBAIJ"
2007dfbe8321SBarry Smith PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2008f2a5309cSSatish Balay {
2009f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2010f2a5309cSSatish Balay   PetscFunctionBegin;
2011f2a5309cSSatish Balay   *array = a->a;
2012f2a5309cSSatish Balay   PetscFunctionReturn(0);
2013f2a5309cSSatish Balay }
2014f2a5309cSSatish Balay 
20154a2ae208SSatish Balay #undef __FUNCT__
20164a2ae208SSatish Balay #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2017dfbe8321SBarry Smith PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2018f2a5309cSSatish Balay {
2019f2a5309cSSatish Balay   PetscFunctionBegin;
2020f2a5309cSSatish Balay   PetscFunctionReturn(0);
2021f2a5309cSSatish Balay }
2022f2a5309cSSatish Balay 
202342ee4b1aSHong Zhang #include "petscblaslapack.h"
202442ee4b1aSHong Zhang #undef __FUNCT__
202542ee4b1aSHong Zhang #define __FUNCT__ "MatAXPY_SeqBAIJ"
2026dfbe8321SBarry Smith PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
202742ee4b1aSHong Zhang {
202842ee4b1aSHong Zhang   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2029dfbe8321SBarry Smith   PetscErrorCode ierr;
2030521d7252SBarry Smith   PetscInt       i,bs=Y->bs,j,bs2;
20314ce68768SBarry Smith   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
203242ee4b1aSHong Zhang 
203342ee4b1aSHong Zhang   PetscFunctionBegin;
203442ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
203571044d3cSBarry Smith     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
2036c537a176SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2037c4319e64SHong Zhang     if (y->xtoy && y->XtoY != X) {
2038c4319e64SHong Zhang       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2039c4319e64SHong Zhang       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2040c537a176SHong Zhang     }
2041c4319e64SHong Zhang     if (!y->xtoy) { /* get xtoy */
2042c4319e64SHong Zhang       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2043c4319e64SHong Zhang       y->XtoY = X;
2044c537a176SHong Zhang     }
2045c4319e64SHong Zhang     bs2 = bs*bs;
2046c537a176SHong Zhang     for (i=0; i<x->nz; i++) {
2047c4319e64SHong Zhang       j = 0;
2048c4319e64SHong Zhang       while (j < bs2){
20496550863bSHong Zhang         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2050c4319e64SHong Zhang         j++;
2051c537a176SHong Zhang       }
2052c4319e64SHong Zhang     }
205363ba0a88SBarry Smith     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
205442ee4b1aSHong Zhang   } else {
205542ee4b1aSHong Zhang     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
205642ee4b1aSHong Zhang   }
205742ee4b1aSHong Zhang   PetscFunctionReturn(0);
205842ee4b1aSHong Zhang }
205942ee4b1aSHong Zhang 
20602593348eSBarry Smith /* -------------------------------------------------------------------*/
2061cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2062cc2dc46cSBarry Smith        MatGetRow_SeqBAIJ,
2063cc2dc46cSBarry Smith        MatRestoreRow_SeqBAIJ,
2064cc2dc46cSBarry Smith        MatMult_SeqBAIJ_N,
206597304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N,
20667c922b88SBarry Smith        MatMultTranspose_SeqBAIJ,
20677c922b88SBarry Smith        MatMultTransposeAdd_SeqBAIJ,
2068cc2dc46cSBarry Smith        MatSolve_SeqBAIJ_N,
2069cc2dc46cSBarry Smith        0,
2070cc2dc46cSBarry Smith        0,
207197304618SKris Buschelman /*10*/ 0,
2072cc2dc46cSBarry Smith        MatLUFactor_SeqBAIJ,
2073cc2dc46cSBarry Smith        0,
2074b6490206SBarry Smith        0,
2075f2501298SSatish Balay        MatTranspose_SeqBAIJ,
207697304618SKris Buschelman /*15*/ MatGetInfo_SeqBAIJ,
2077cc2dc46cSBarry Smith        MatEqual_SeqBAIJ,
2078cc2dc46cSBarry Smith        MatGetDiagonal_SeqBAIJ,
2079cc2dc46cSBarry Smith        MatDiagonalScale_SeqBAIJ,
2080cc2dc46cSBarry Smith        MatNorm_SeqBAIJ,
208197304618SKris Buschelman /*20*/ 0,
2082cc2dc46cSBarry Smith        MatAssemblyEnd_SeqBAIJ,
2083cc2dc46cSBarry Smith        0,
2084cc2dc46cSBarry Smith        MatSetOption_SeqBAIJ,
2085cc2dc46cSBarry Smith        MatZeroEntries_SeqBAIJ,
208697304618SKris Buschelman /*25*/ MatZeroRows_SeqBAIJ,
2087cc2dc46cSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,
2088cc2dc46cSBarry Smith        MatLUFactorNumeric_SeqBAIJ_N,
2089c05c3958SHong Zhang        MatCholeskyFactorSymbolic_SeqBAIJ,
2090c05c3958SHong Zhang        MatCholeskyFactorNumeric_SeqBAIJ_N,
209197304618SKris Buschelman /*30*/ MatSetUpPreallocation_SeqBAIJ,
2092cc2dc46cSBarry Smith        MatILUFactorSymbolic_SeqBAIJ,
2093c05c3958SHong Zhang        MatICCFactorSymbolic_SeqBAIJ,
2094f2a5309cSSatish Balay        MatGetArray_SeqBAIJ,
2095f2a5309cSSatish Balay        MatRestoreArray_SeqBAIJ,
209697304618SKris Buschelman /*35*/ MatDuplicate_SeqBAIJ,
2097cc2dc46cSBarry Smith        0,
2098cc2dc46cSBarry Smith        0,
2099cc2dc46cSBarry Smith        MatILUFactor_SeqBAIJ,
2100cc2dc46cSBarry Smith        0,
210197304618SKris Buschelman /*40*/ MatAXPY_SeqBAIJ,
2102cc2dc46cSBarry Smith        MatGetSubMatrices_SeqBAIJ,
2103cc2dc46cSBarry Smith        MatIncreaseOverlap_SeqBAIJ,
2104cc2dc46cSBarry Smith        MatGetValues_SeqBAIJ,
2105cc2dc46cSBarry Smith        0,
210697304618SKris Buschelman /*45*/ MatPrintHelp_SeqBAIJ,
2107cc2dc46cSBarry Smith        MatScale_SeqBAIJ,
2108cc2dc46cSBarry Smith        0,
2109cc2dc46cSBarry Smith        0,
2110cc2dc46cSBarry Smith        0,
2111521d7252SBarry Smith /*50*/ 0,
21123b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
211392c4ed94SBarry Smith        MatRestoreRowIJ_SeqBAIJ,
2114cc2dc46cSBarry Smith        0,
2115cc2dc46cSBarry Smith        0,
211697304618SKris Buschelman /*55*/ 0,
2117cc2dc46cSBarry Smith        0,
2118cc2dc46cSBarry Smith        0,
2119cc2dc46cSBarry Smith        0,
2120d3825aa8SBarry Smith        MatSetValuesBlocked_SeqBAIJ,
212197304618SKris Buschelman /*60*/ MatGetSubMatrix_SeqBAIJ,
2122b9b97703SBarry Smith        MatDestroy_SeqBAIJ,
2123b9b97703SBarry Smith        MatView_SeqBAIJ,
21243a64cc32SBarry Smith        MatGetPetscMaps_Petsc,
2125273d9f13SBarry Smith        0,
212697304618SKris Buschelman /*65*/ 0,
2127273d9f13SBarry Smith        0,
2128273d9f13SBarry Smith        0,
2129273d9f13SBarry Smith        0,
2130273d9f13SBarry Smith        0,
213197304618SKris Buschelman /*70*/ MatGetRowMax_SeqBAIJ,
213297304618SKris Buschelman        MatConvert_Basic,
2133273d9f13SBarry Smith        0,
213497304618SKris Buschelman        0,
213597304618SKris Buschelman        0,
213697304618SKris Buschelman /*75*/ 0,
213797304618SKris Buschelman        0,
213897304618SKris Buschelman        0,
213997304618SKris Buschelman        0,
214097304618SKris Buschelman        0,
214197304618SKris Buschelman /*80*/ 0,
214297304618SKris Buschelman        0,
214397304618SKris Buschelman        0,
214497304618SKris Buschelman        0,
2145865e5f61SKris Buschelman        MatLoad_SeqBAIJ,
2146865e5f61SKris Buschelman /*85*/ 0,
2147b01c7715SBarry Smith        0,
2148b01c7715SBarry Smith        0,
2149b01c7715SBarry Smith        0,
2150865e5f61SKris Buschelman        0,
2151865e5f61SKris Buschelman /*90*/ 0,
2152865e5f61SKris Buschelman        0,
2153865e5f61SKris Buschelman        0,
2154865e5f61SKris Buschelman        0,
2155865e5f61SKris Buschelman        0,
2156865e5f61SKris Buschelman /*95*/ 0,
2157865e5f61SKris Buschelman        0,
2158865e5f61SKris Buschelman        0,
2159865e5f61SKris Buschelman        0};
21602593348eSBarry Smith 
21613e90b805SBarry Smith EXTERN_C_BEGIN
21624a2ae208SSatish Balay #undef __FUNCT__
21634a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2164dfbe8321SBarry Smith PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
21653e90b805SBarry Smith {
21663e90b805SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2167521d7252SBarry Smith   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2168dfbe8321SBarry Smith   PetscErrorCode ierr;
21693e90b805SBarry Smith 
21703e90b805SBarry Smith   PetscFunctionBegin;
21713e90b805SBarry Smith   if (aij->nonew != 1) {
2172634064b4SBarry Smith     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
21733e90b805SBarry Smith   }
21743e90b805SBarry Smith 
21753e90b805SBarry Smith   /* allocate space for values if not already there */
21763e90b805SBarry Smith   if (!aij->saved_values) {
217787828ca2SBarry Smith     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
21783e90b805SBarry Smith   }
21793e90b805SBarry Smith 
21803e90b805SBarry Smith   /* copy values over */
218187828ca2SBarry Smith   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
21823e90b805SBarry Smith   PetscFunctionReturn(0);
21833e90b805SBarry Smith }
21843e90b805SBarry Smith EXTERN_C_END
21853e90b805SBarry Smith 
21863e90b805SBarry Smith EXTERN_C_BEGIN
21874a2ae208SSatish Balay #undef __FUNCT__
21884a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2189dfbe8321SBarry Smith PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
21903e90b805SBarry Smith {
21913e90b805SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
21926849ba73SBarry Smith   PetscErrorCode ierr;
2193521d7252SBarry Smith   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
21943e90b805SBarry Smith 
21953e90b805SBarry Smith   PetscFunctionBegin;
21963e90b805SBarry Smith   if (aij->nonew != 1) {
2197634064b4SBarry Smith     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
21983e90b805SBarry Smith   }
21993e90b805SBarry Smith   if (!aij->saved_values) {
2200634064b4SBarry Smith     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
22013e90b805SBarry Smith   }
22023e90b805SBarry Smith 
22033e90b805SBarry Smith   /* copy values over */
220487828ca2SBarry Smith   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
22053e90b805SBarry Smith   PetscFunctionReturn(0);
22063e90b805SBarry Smith }
22073e90b805SBarry Smith EXTERN_C_END
22083e90b805SBarry Smith 
2209273d9f13SBarry Smith EXTERN_C_BEGIN
2210ceb03754SKris Buschelman extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
2211ceb03754SKris Buschelman extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*);
2212273d9f13SBarry Smith EXTERN_C_END
2213273d9f13SBarry Smith 
2214273d9f13SBarry Smith EXTERN_C_BEGIN
22154a2ae208SSatish Balay #undef __FUNCT__
2216a23d5eceSKris Buschelman #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2217c1ac3661SBarry Smith PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2218a23d5eceSKris Buschelman {
2219a23d5eceSKris Buschelman   Mat_SeqBAIJ    *b;
22206849ba73SBarry Smith   PetscErrorCode ierr;
2221c1ac3661SBarry Smith   PetscInt       i,len,mbs,nbs,bs2,newbs = bs;
2222a23d5eceSKris Buschelman   PetscTruth     flg;
2223a23d5eceSKris Buschelman 
2224a23d5eceSKris Buschelman   PetscFunctionBegin;
2225a23d5eceSKris Buschelman 
2226a23d5eceSKris Buschelman   B->preallocated = PETSC_TRUE;
2227a23d5eceSKris Buschelman   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2228a23d5eceSKris Buschelman   if (nnz && newbs != bs) {
2229634064b4SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2230a23d5eceSKris Buschelman   }
2231a23d5eceSKris Buschelman   bs   = newbs;
2232a23d5eceSKris Buschelman 
2233a23d5eceSKris Buschelman   mbs  = B->m/bs;
2234a23d5eceSKris Buschelman   nbs  = B->n/bs;
2235a23d5eceSKris Buschelman   bs2  = bs*bs;
2236a23d5eceSKris Buschelman 
2237a23d5eceSKris Buschelman   if (mbs*bs!=B->m || nbs*bs!=B->n) {
223877431f27SBarry Smith     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2239a23d5eceSKris Buschelman   }
2240a23d5eceSKris Buschelman 
2241a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
224277431f27SBarry Smith   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2243a23d5eceSKris Buschelman   if (nnz) {
2244a23d5eceSKris Buschelman     for (i=0; i<mbs; i++) {
224577431f27SBarry Smith       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
224677431f27SBarry Smith       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2247a23d5eceSKris Buschelman     }
2248a23d5eceSKris Buschelman   }
2249a23d5eceSKris Buschelman 
2250a23d5eceSKris Buschelman   b       = (Mat_SeqBAIJ*)B->data;
2251a23d5eceSKris Buschelman   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2252a23d5eceSKris Buschelman   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2253a23d5eceSKris Buschelman   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2254a23d5eceSKris Buschelman   if (!flg) {
2255a23d5eceSKris Buschelman     switch (bs) {
2256a23d5eceSKris Buschelman     case 1:
2257a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2258a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_1;
2259a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2260a23d5eceSKris Buschelman       break;
2261a23d5eceSKris Buschelman     case 2:
2262a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2263a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_2;
2264a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2265b01c7715SBarry Smith       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2266a23d5eceSKris Buschelman       break;
2267a23d5eceSKris Buschelman     case 3:
2268a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2269a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_3;
2270a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2271b01c7715SBarry Smith       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2272a23d5eceSKris Buschelman       break;
2273a23d5eceSKris Buschelman     case 4:
2274a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2275a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_4;
2276a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2277b01c7715SBarry Smith       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2278a23d5eceSKris Buschelman       break;
2279a23d5eceSKris Buschelman     case 5:
2280a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2281a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_5;
2282a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2283b01c7715SBarry Smith       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2284a23d5eceSKris Buschelman       break;
2285a23d5eceSKris Buschelman     case 6:
2286a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2287a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_6;
2288a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2289a23d5eceSKris Buschelman       break;
2290a23d5eceSKris Buschelman     case 7:
2291a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2292a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_7;
2293a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2294a23d5eceSKris Buschelman       break;
2295a23d5eceSKris Buschelman     default:
2296a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2297a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_N;
2298a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2299a23d5eceSKris Buschelman       break;
2300a23d5eceSKris Buschelman     }
2301a23d5eceSKris Buschelman   }
2302521d7252SBarry Smith   B->bs      = bs;
2303a23d5eceSKris Buschelman   b->mbs     = mbs;
2304a23d5eceSKris Buschelman   b->nbs     = nbs;
2305c1ac3661SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr);
2306a23d5eceSKris Buschelman   if (!nnz) {
2307a23d5eceSKris Buschelman     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2308a23d5eceSKris Buschelman     else if (nz <= 0)        nz = 1;
2309a23d5eceSKris Buschelman     for (i=0; i<mbs; i++) b->imax[i] = nz;
2310a23d5eceSKris Buschelman     nz = nz*mbs;
2311a23d5eceSKris Buschelman   } else {
2312a23d5eceSKris Buschelman     nz = 0;
2313a23d5eceSKris Buschelman     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2314a23d5eceSKris Buschelman   }
2315a23d5eceSKris Buschelman 
2316a23d5eceSKris Buschelman   /* allocate the matrix space */
2317c1ac3661SBarry Smith   len   = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
2318a23d5eceSKris Buschelman   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2319a23d5eceSKris Buschelman   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2320c1ac3661SBarry Smith   b->j  = (PetscInt*)(b->a + nz*bs2);
2321c1ac3661SBarry Smith   ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2322a23d5eceSKris Buschelman   b->i  = b->j + nz;
2323a23d5eceSKris Buschelman   b->singlemalloc = PETSC_TRUE;
2324a23d5eceSKris Buschelman 
2325a23d5eceSKris Buschelman   b->i[0] = 0;
2326a23d5eceSKris Buschelman   for (i=1; i<mbs+1; i++) {
2327a23d5eceSKris Buschelman     b->i[i] = b->i[i-1] + b->imax[i-1];
2328a23d5eceSKris Buschelman   }
2329a23d5eceSKris Buschelman 
2330a23d5eceSKris Buschelman   /* b->ilen will count nonzeros in each block row so far. */
2331c1ac3661SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr);
233252e6d16bSBarry Smith   ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2333a23d5eceSKris Buschelman   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2334a23d5eceSKris Buschelman 
2335521d7252SBarry Smith   B->bs               = bs;
2336a23d5eceSKris Buschelman   b->bs2              = bs2;
2337a23d5eceSKris Buschelman   b->mbs              = mbs;
2338a23d5eceSKris Buschelman   b->nz               = 0;
2339a23d5eceSKris Buschelman   b->maxnz            = nz*bs2;
2340a23d5eceSKris Buschelman   B->info.nz_unneeded = (PetscReal)b->maxnz;
2341a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2342a23d5eceSKris Buschelman }
2343a23d5eceSKris Buschelman EXTERN_C_END
2344a23d5eceSKris Buschelman 
23450bad9183SKris Buschelman /*MC
2346fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
23470bad9183SKris Buschelman    block sparse compressed row format.
23480bad9183SKris Buschelman 
23490bad9183SKris Buschelman    Options Database Keys:
23500bad9183SKris Buschelman . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
23510bad9183SKris Buschelman 
23520bad9183SKris Buschelman   Level: beginner
23530bad9183SKris Buschelman 
23540bad9183SKris Buschelman .seealso: MatCreateSeqBAIJ
23550bad9183SKris Buschelman M*/
23560bad9183SKris Buschelman 
2357a23d5eceSKris Buschelman EXTERN_C_BEGIN
2358a23d5eceSKris Buschelman #undef __FUNCT__
23594a2ae208SSatish Balay #define __FUNCT__ "MatCreate_SeqBAIJ"
2360dfbe8321SBarry Smith PetscErrorCode MatCreate_SeqBAIJ(Mat B)
23612593348eSBarry Smith {
2362dfbe8321SBarry Smith   PetscErrorCode ierr;
2363c1ac3661SBarry Smith   PetscMPIInt    size;
2364b6490206SBarry Smith   Mat_SeqBAIJ    *b;
23653b2fbd54SBarry Smith 
23663a40ed3dSBarry Smith   PetscFunctionBegin;
2367273d9f13SBarry Smith   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
236829bbc08cSBarry Smith   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2369b6490206SBarry Smith 
2370273d9f13SBarry Smith   B->m = B->M = PetscMax(B->m,B->M);
2371273d9f13SBarry Smith   B->n = B->N = PetscMax(B->n,B->N);
2372b0a32e0cSBarry Smith   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2373b0a32e0cSBarry Smith   B->data = (void*)b;
2374549d3d68SSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
23752593348eSBarry Smith   B->factor           = 0;
237690f02eecSBarry Smith   B->mapping          = 0;
23772593348eSBarry Smith   b->row              = 0;
23782593348eSBarry Smith   b->col              = 0;
2379e51c0b9cSSatish Balay   b->icol             = 0;
23802593348eSBarry Smith   b->reallocs         = 0;
23813e90b805SBarry Smith   b->saved_values     = 0;
2382cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
2383563b5814SBarry Smith   b->setvalueslen     = 0;
2384563b5814SBarry Smith   b->setvaluescopy    = PETSC_NULL;
2385563b5814SBarry Smith #endif
23862593348eSBarry Smith 
23873a64cc32SBarry Smith   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
23883a64cc32SBarry Smith   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2389a5ae1ecdSBarry Smith 
2390c4992f7dSBarry Smith   b->sorted           = PETSC_FALSE;
2391c4992f7dSBarry Smith   b->roworiented      = PETSC_TRUE;
23922593348eSBarry Smith   b->nonew            = 0;
23932593348eSBarry Smith   b->diag             = 0;
23942593348eSBarry Smith   b->solve_work       = 0;
2395de6a44a3SBarry Smith   b->mult_work        = 0;
23962a1b7f2aSHong Zhang   B->spptr            = 0;
23970e6d2581SBarry Smith   B->info.nz_unneeded = (PetscReal)b->maxnz;
2398883fce79SBarry Smith   b->keepzeroedrows   = PETSC_FALSE;
2399c4319e64SHong Zhang   b->xtoy              = 0;
2400c4319e64SHong Zhang   b->XtoY              = 0;
240173e7a558SHong Zhang   b->compressedrow.use     = PETSC_FALSE;
240226e093fcSHong Zhang   b->compressedrow.nrows   = 0;
240373e7a558SHong Zhang   b->compressedrow.i       = PETSC_NULL;
240473e7a558SHong Zhang   b->compressedrow.rindex  = PETSC_NULL;
240573e7a558SHong Zhang   b->compressedrow.checked = PETSC_FALSE;
240688e51ccdSHong Zhang   B->same_nonzero          = PETSC_FALSE;
24074e220ebcSLois Curfman McInnes 
2408*43516a2dSKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2409*43516a2dSKris Buschelman                                      "MatInvertBlockDiagonal_SeqBAIJ",
2410*43516a2dSKris Buschelman                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2411f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
24123e90b805SBarry Smith                                      "MatStoreValues_SeqBAIJ",
2413bc4b532fSSatish Balay                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2414f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
24153e90b805SBarry Smith                                      "MatRetrieveValues_SeqBAIJ",
2416bc4b532fSSatish Balay                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2417f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
241827a8da17SBarry Smith                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2419bc4b532fSSatish Balay                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2420a6175056SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2421273d9f13SBarry Smith                                      "MatConvert_SeqBAIJ_SeqAIJ",
2422273d9f13SBarry Smith                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2423b24ad042SBarry Smith #if !defined(PETSC_USE_64BIT_INT)
2424a0e1a404SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2425a0e1a404SHong Zhang                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2426a0e1a404SHong Zhang                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2427b24ad042SBarry Smith #endif
2428a23d5eceSKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2429a23d5eceSKris Buschelman                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2430a23d5eceSKris Buschelman                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
24313a40ed3dSBarry Smith   PetscFunctionReturn(0);
24322593348eSBarry Smith }
2433273d9f13SBarry Smith EXTERN_C_END
24342593348eSBarry Smith 
24354a2ae208SSatish Balay #undef __FUNCT__
24364a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2437dfbe8321SBarry Smith PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
24382593348eSBarry Smith {
24392593348eSBarry Smith   Mat            C;
2440b6490206SBarry Smith   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
24416849ba73SBarry Smith   PetscErrorCode ierr;
2442c1ac3661SBarry Smith   PetscInt       i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2443de6a44a3SBarry Smith 
24443a40ed3dSBarry Smith   PetscFunctionBegin;
244529bbc08cSBarry Smith   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
24462593348eSBarry Smith 
24472593348eSBarry Smith   *B = 0;
2448273d9f13SBarry Smith   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2449be5d1d56SKris Buschelman   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
24501d5dac46SHong Zhang   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2451273d9f13SBarry Smith   c    = (Mat_SeqBAIJ*)C->data;
245244cd7ae7SLois Curfman McInnes 
24534beb1cfeSHong Zhang   C->M   = A->M;
24544beb1cfeSHong Zhang   C->N   = A->N;
2455521d7252SBarry Smith   C->bs  = A->bs;
24569df24120SSatish Balay   c->bs2 = a->bs2;
2457b6490206SBarry Smith   c->mbs = a->mbs;
2458b6490206SBarry Smith   c->nbs = a->nbs;
24592593348eSBarry Smith 
2460c1ac3661SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2461690b6cddSBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
2462b6490206SBarry Smith   for (i=0; i<mbs; i++) {
24632593348eSBarry Smith     c->imax[i] = a->imax[i];
24642593348eSBarry Smith     c->ilen[i] = a->ilen[i];
24652593348eSBarry Smith   }
24662593348eSBarry Smith 
24672593348eSBarry Smith   /* allocate the matrix space */
2468c4992f7dSBarry Smith   c->singlemalloc = PETSC_TRUE;
2469c1ac3661SBarry Smith   len  = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
2470b0a32e0cSBarry Smith   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2471690b6cddSBarry Smith   c->j = (PetscInt*)(c->a + nz*bs2);
2472de6a44a3SBarry Smith   c->i = c->j + nz;
2473c1ac3661SBarry Smith   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2474b6490206SBarry Smith   if (mbs > 0) {
2475c1ac3661SBarry Smith     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
24762e8a6d31SBarry Smith     if (cpvalues == MAT_COPY_VALUES) {
2477549d3d68SSatish Balay       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
24782e8a6d31SBarry Smith     } else {
2479549d3d68SSatish Balay       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
24802593348eSBarry Smith     }
24812593348eSBarry Smith   }
24822593348eSBarry Smith 
248352e6d16bSBarry Smith   ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
24842593348eSBarry Smith   c->sorted      = a->sorted;
24852593348eSBarry Smith   c->roworiented = a->roworiented;
24862593348eSBarry Smith   c->nonew       = a->nonew;
24872593348eSBarry Smith 
24882593348eSBarry Smith   if (a->diag) {
2489c1ac3661SBarry Smith     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
249052e6d16bSBarry Smith     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2491b6490206SBarry Smith     for (i=0; i<mbs; i++) {
24922593348eSBarry Smith       c->diag[i] = a->diag[i];
24932593348eSBarry Smith     }
249498305bb5SBarry Smith   } else c->diag        = 0;
24952593348eSBarry Smith   c->nz                 = a->nz;
24962593348eSBarry Smith   c->maxnz              = a->maxnz;
24972593348eSBarry Smith   c->solve_work         = 0;
24987fc0212eSBarry Smith   c->mult_work          = 0;
2499273d9f13SBarry Smith   C->preallocated       = PETSC_TRUE;
2500273d9f13SBarry Smith   C->assembled          = PETSC_TRUE;
250188e51ccdSHong Zhang 
250288e51ccdSHong Zhang   c->compressedrow.use     = a->compressedrow.use;
250388e51ccdSHong Zhang   c->compressedrow.nrows   = a->compressedrow.nrows;
250488e51ccdSHong Zhang   c->compressedrow.checked = a->compressedrow.checked;
250588e51ccdSHong Zhang   if ( a->compressedrow.checked && a->compressedrow.use){
250688e51ccdSHong Zhang     i = a->compressedrow.nrows;
250788e51ccdSHong Zhang     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
250888e51ccdSHong Zhang     c->compressedrow.rindex = c->compressedrow.i + i + 1;
250988e51ccdSHong Zhang     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
251088e51ccdSHong Zhang     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
251188e51ccdSHong Zhang   } else {
251288e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
251388e51ccdSHong Zhang     c->compressedrow.i      = PETSC_NULL;
251488e51ccdSHong Zhang     c->compressedrow.rindex = PETSC_NULL;
251588e51ccdSHong Zhang   }
251688e51ccdSHong Zhang   C->same_nonzero = A->same_nonzero;
25172593348eSBarry Smith   *B = C;
2518b0a32e0cSBarry Smith   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
25193a40ed3dSBarry Smith   PetscFunctionReturn(0);
25202593348eSBarry Smith }
25212593348eSBarry Smith 
25224a2ae208SSatish Balay #undef __FUNCT__
25234a2ae208SSatish Balay #define __FUNCT__ "MatLoad_SeqBAIJ"
2524dfbe8321SBarry Smith PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
25252593348eSBarry Smith {
2526b6490206SBarry Smith   Mat_SeqBAIJ    *a;
25272593348eSBarry Smith   Mat            B;
25286849ba73SBarry Smith   PetscErrorCode ierr;
2529b24ad042SBarry Smith   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2530c1ac3661SBarry Smith   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2531c1ac3661SBarry Smith   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2532c1ac3661SBarry Smith   PetscInt       *masked,nmask,tmp,bs2,ishift;
2533b24ad042SBarry Smith   PetscMPIInt    size;
2534b24ad042SBarry Smith   int            fd;
253587828ca2SBarry Smith   PetscScalar    *aa;
253619bcc07fSBarry Smith   MPI_Comm       comm = ((PetscObject)viewer)->comm;
25372593348eSBarry Smith 
25383a40ed3dSBarry Smith   PetscFunctionBegin;
2539b0a32e0cSBarry Smith   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2540de6a44a3SBarry Smith   bs2  = bs*bs;
2541b6490206SBarry Smith 
2542d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254329bbc08cSBarry Smith   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2544b0a32e0cSBarry Smith   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
25450752156aSBarry Smith   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2546552e946dSBarry Smith   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
25472593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
25482593348eSBarry Smith 
2549d64ed03dSBarry Smith   if (header[3] < 0) {
255029bbc08cSBarry Smith     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2551d64ed03dSBarry Smith   }
2552d64ed03dSBarry Smith 
255329bbc08cSBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
255435aab85fSBarry Smith 
255535aab85fSBarry Smith   /*
255635aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
255735aab85fSBarry Smith     divisible by the blocksize
255835aab85fSBarry Smith   */
2559b6490206SBarry Smith   mbs        = M/bs;
256035aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
256135aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
256235aab85fSBarry Smith   else                  mbs++;
256335aab85fSBarry Smith   if (extra_rows) {
256463ba0a88SBarry Smith     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
256535aab85fSBarry Smith   }
2566b6490206SBarry Smith 
25672593348eSBarry Smith   /* read in row lengths */
2568c1ac3661SBarry Smith   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
25690752156aSBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
257035aab85fSBarry Smith   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
25712593348eSBarry Smith 
2572b6490206SBarry Smith   /* read in column indices */
2573c1ac3661SBarry Smith   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
25740752156aSBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
257535aab85fSBarry Smith   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2576b6490206SBarry Smith 
2577b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
2578c1ac3661SBarry Smith   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2579c1ac3661SBarry Smith   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2580c1ac3661SBarry Smith   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2581c1ac3661SBarry Smith   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
258235aab85fSBarry Smith   masked   = mask + mbs;
2583b6490206SBarry Smith   rowcount = 0; nzcount = 0;
2584b6490206SBarry Smith   for (i=0; i<mbs; i++) {
258535aab85fSBarry Smith     nmask = 0;
2586b6490206SBarry Smith     for (j=0; j<bs; j++) {
2587b6490206SBarry Smith       kmax = rowlengths[rowcount];
2588b6490206SBarry Smith       for (k=0; k<kmax; k++) {
258935aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
259035aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2591b6490206SBarry Smith       }
2592b6490206SBarry Smith       rowcount++;
2593b6490206SBarry Smith     }
259435aab85fSBarry Smith     browlengths[i] += nmask;
259535aab85fSBarry Smith     /* zero out the mask elements we set */
259635aab85fSBarry Smith     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2597b6490206SBarry Smith   }
2598b6490206SBarry Smith 
25992593348eSBarry Smith   /* create our matrix */
2600dd23797bSKris Buschelman   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
260178ae41b4SKris Buschelman   ierr = MatSetType(B,type);CHKERRQ(ierr);
260278ae41b4SKris Buschelman   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2603b6490206SBarry Smith   a = (Mat_SeqBAIJ*)B->data;
26042593348eSBarry Smith 
26052593348eSBarry Smith   /* set matrix "i" values */
2606de6a44a3SBarry Smith   a->i[0] = 0;
2607b6490206SBarry Smith   for (i=1; i<= mbs; i++) {
2608b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
2609b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
26102593348eSBarry Smith   }
2611b6490206SBarry Smith   a->nz         = 0;
2612b6490206SBarry Smith   for (i=0; i<mbs; i++) a->nz += browlengths[i];
26132593348eSBarry Smith 
2614b6490206SBarry Smith   /* read in nonzero values */
261587828ca2SBarry Smith   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
26160752156aSBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
261735aab85fSBarry Smith   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2618b6490206SBarry Smith 
2619b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2620b6490206SBarry Smith   nzcount = 0; jcount = 0;
2621b6490206SBarry Smith   for (i=0; i<mbs; i++) {
2622b6490206SBarry Smith     nzcountb = nzcount;
262335aab85fSBarry Smith     nmask    = 0;
2624b6490206SBarry Smith     for (j=0; j<bs; j++) {
2625b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2626b6490206SBarry Smith       for (k=0; k<kmax; k++) {
262735aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
262835aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2629b6490206SBarry Smith       }
2630b6490206SBarry Smith     }
2631de6a44a3SBarry Smith     /* sort the masked values */
2632433994e6SBarry Smith     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2633de6a44a3SBarry Smith 
2634b6490206SBarry Smith     /* set "j" values into matrix */
2635b6490206SBarry Smith     maskcount = 1;
263635aab85fSBarry Smith     for (j=0; j<nmask; j++) {
263735aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2638de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2639b6490206SBarry Smith     }
2640b6490206SBarry Smith     /* set "a" values into matrix */
2641de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2642b6490206SBarry Smith     for (j=0; j<bs; j++) {
2643b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2644b6490206SBarry Smith       for (k=0; k<kmax; k++) {
2645de6a44a3SBarry Smith         tmp       = jj[nzcountb]/bs ;
2646de6a44a3SBarry Smith         block     = mask[tmp] - 1;
2647de6a44a3SBarry Smith         point     = jj[nzcountb] - bs*tmp;
2648de6a44a3SBarry Smith         idx       = ishift + bs2*block + j + bs*point;
2649375fe846SBarry Smith         a->a[idx] = (MatScalar)aa[nzcountb++];
2650b6490206SBarry Smith       }
2651b6490206SBarry Smith     }
265235aab85fSBarry Smith     /* zero out the mask elements we set */
265335aab85fSBarry Smith     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2654b6490206SBarry Smith   }
265529bbc08cSBarry Smith   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2656b6490206SBarry Smith 
2657606d414cSSatish Balay   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2658606d414cSSatish Balay   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2659606d414cSSatish Balay   ierr = PetscFree(aa);CHKERRQ(ierr);
2660606d414cSSatish Balay   ierr = PetscFree(jj);CHKERRQ(ierr);
2661606d414cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
2662b6490206SBarry Smith 
266378ae41b4SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
266478ae41b4SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
26659c01be13SBarry Smith   ierr = MatView_Private(B);CHKERRQ(ierr);
266678ae41b4SKris Buschelman 
266778ae41b4SKris Buschelman   *A = B;
26683a40ed3dSBarry Smith   PetscFunctionReturn(0);
26692593348eSBarry Smith }
26702593348eSBarry Smith 
26714a2ae208SSatish Balay #undef __FUNCT__
26724a2ae208SSatish Balay #define __FUNCT__ "MatCreateSeqBAIJ"
2673273d9f13SBarry Smith /*@C
2674273d9f13SBarry Smith    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2675273d9f13SBarry Smith    compressed row) format.  For good matrix assembly performance the
2676273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
2677273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
2678273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
26792593348eSBarry Smith 
2680273d9f13SBarry Smith    Collective on MPI_Comm
2681273d9f13SBarry Smith 
2682273d9f13SBarry Smith    Input Parameters:
2683273d9f13SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
2684273d9f13SBarry Smith .  bs - size of block
2685273d9f13SBarry Smith .  m - number of rows
2686273d9f13SBarry Smith .  n - number of columns
268735d8aa7fSBarry Smith .  nz - number of nonzero blocks  per block row (same for all rows)
268835d8aa7fSBarry Smith -  nnz - array containing the number of nonzero blocks in the various block rows
2689273d9f13SBarry Smith          (possibly different for each block row) or PETSC_NULL
2690273d9f13SBarry Smith 
2691273d9f13SBarry Smith    Output Parameter:
2692273d9f13SBarry Smith .  A - the matrix
2693273d9f13SBarry Smith 
2694273d9f13SBarry Smith    Options Database Keys:
2695273d9f13SBarry Smith .   -mat_no_unroll - uses code that does not unroll the loops in the
2696273d9f13SBarry Smith                      block calculations (much slower)
2697273d9f13SBarry Smith .    -mat_block_size - size of the blocks to use
2698273d9f13SBarry Smith 
2699273d9f13SBarry Smith    Level: intermediate
2700273d9f13SBarry Smith 
2701273d9f13SBarry Smith    Notes:
2702d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
2703d1be2dadSMatthew Knepley 
270449a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
270549a6f317SBarry Smith 
270635d8aa7fSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
270735d8aa7fSBarry Smith 
2708273d9f13SBarry Smith    The block AIJ format is fully compatible with standard Fortran 77
2709273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
2710273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
2711273d9f13SBarry Smith 
2712273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
2713273d9f13SBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2714273d9f13SBarry Smith    allocation.  For additional details, see the users manual chapter on
2715273d9f13SBarry Smith    matrices.
2716273d9f13SBarry Smith 
2717273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2718273d9f13SBarry Smith @*/
2719c1ac3661SBarry Smith PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2720273d9f13SBarry Smith {
2721dfbe8321SBarry Smith   PetscErrorCode ierr;
2722273d9f13SBarry Smith 
2723273d9f13SBarry Smith   PetscFunctionBegin;
2724273d9f13SBarry Smith   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2725273d9f13SBarry Smith   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2726273d9f13SBarry Smith   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2727273d9f13SBarry Smith   PetscFunctionReturn(0);
2728273d9f13SBarry Smith }
2729273d9f13SBarry Smith 
27304a2ae208SSatish Balay #undef __FUNCT__
27314a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2732273d9f13SBarry Smith /*@C
2733273d9f13SBarry Smith    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2734273d9f13SBarry Smith    per row in the matrix. For good matrix assembly performance the
2735273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
2736273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
2737273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
2738273d9f13SBarry Smith 
2739273d9f13SBarry Smith    Collective on MPI_Comm
2740273d9f13SBarry Smith 
2741273d9f13SBarry Smith    Input Parameters:
2742273d9f13SBarry Smith +  A - the matrix
2743273d9f13SBarry Smith .  bs - size of block
2744273d9f13SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
2745273d9f13SBarry Smith -  nnz - array containing the number of block nonzeros in the various block rows
2746273d9f13SBarry Smith          (possibly different for each block row) or PETSC_NULL
2747273d9f13SBarry Smith 
2748273d9f13SBarry Smith    Options Database Keys:
2749273d9f13SBarry Smith .   -mat_no_unroll - uses code that does not unroll the loops in the
2750273d9f13SBarry Smith                      block calculations (much slower)
2751273d9f13SBarry Smith .    -mat_block_size - size of the blocks to use
2752273d9f13SBarry Smith 
2753273d9f13SBarry Smith    Level: intermediate
2754273d9f13SBarry Smith 
2755273d9f13SBarry Smith    Notes:
275649a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
275749a6f317SBarry Smith 
2758273d9f13SBarry Smith    The block AIJ format is fully compatible with standard Fortran 77
2759273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
2760273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
2761273d9f13SBarry Smith 
2762273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
2763273d9f13SBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2764273d9f13SBarry Smith    allocation.  For additional details, see the users manual chapter on
2765273d9f13SBarry Smith    matrices.
2766273d9f13SBarry Smith 
2767273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2768273d9f13SBarry Smith @*/
2769c1ac3661SBarry Smith PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2770273d9f13SBarry Smith {
2771c1ac3661SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2772273d9f13SBarry Smith 
2773273d9f13SBarry Smith   PetscFunctionBegin;
2774a23d5eceSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2775a23d5eceSKris Buschelman   if (f) {
2776a23d5eceSKris Buschelman     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2777273d9f13SBarry Smith   }
2778273d9f13SBarry Smith   PetscFunctionReturn(0);
2779273d9f13SBarry Smith }
2780a1d92eedSBarry Smith 
2781