xref: /petsc/src/mat/impls/baij/seq/baij.c (revision be1d678a52e6eff2808b2fa31ae986cdbf03c9fe)
1*be1d678aSKris Buschelman #define PETSCMAT_DLL
2*be1d678aSKris Buschelman 
32593348eSBarry Smith /*
4b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
52593348eSBarry Smith   matrix storage format.
62593348eSBarry Smith */
770f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
81a6a6d98SBarry Smith #include "src/inline/spops.h"
9325e03aeSBarry Smith #include "petscsys.h"                     /*I "petscmat.h" I*/
103b547af2SSatish Balay 
11b01c7715SBarry Smith #include "src/inline/ilu.h"
12b01c7715SBarry Smith 
13b01c7715SBarry Smith #undef __FUNCT__
14b01c7715SBarry Smith #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
15dfbe8321SBarry Smith PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
16b01c7715SBarry Smith {
17b01c7715SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
186849ba73SBarry Smith   PetscErrorCode ierr;
19521d7252SBarry Smith   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
20b01c7715SBarry Smith   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
21b01c7715SBarry Smith 
22b01c7715SBarry Smith   PetscFunctionBegin;
23b01c7715SBarry Smith   if (a->idiagvalid) PetscFunctionReturn(0);
24b01c7715SBarry Smith   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
25b01c7715SBarry Smith   diag_offset = a->diag;
26b01c7715SBarry Smith   if (!a->idiag) {
27b01c7715SBarry Smith     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
28b01c7715SBarry Smith   }
29b01c7715SBarry Smith   diag  = a->idiag;
30b01c7715SBarry Smith   mdiag = a->idiag+bs*bs*mbs;
31b01c7715SBarry Smith   /* factor and invert each block */
32521d7252SBarry Smith   switch (bs){
33b01c7715SBarry Smith     case 2:
34b01c7715SBarry Smith       for (i=0; i<mbs; i++) {
35b01c7715SBarry Smith         odiag   = v + 4*diag_offset[i];
36b01c7715SBarry Smith         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
37b01c7715SBarry Smith 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
38b01c7715SBarry Smith 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
39b01c7715SBarry Smith 	diag    += 4;
40b01c7715SBarry Smith 	mdiag   += 4;
41b01c7715SBarry Smith       }
42b01c7715SBarry Smith       break;
43b01c7715SBarry Smith     case 3:
44b01c7715SBarry Smith       for (i=0; i<mbs; i++) {
45b01c7715SBarry Smith         odiag    = v + 9*diag_offset[i];
46b01c7715SBarry Smith         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
47b01c7715SBarry Smith         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
48b01c7715SBarry Smith         diag[8]  = odiag[8];
49b01c7715SBarry Smith         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
50b01c7715SBarry Smith         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
51b01c7715SBarry Smith         mdiag[8] = odiag[8];
52b01c7715SBarry Smith 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
53b01c7715SBarry Smith 	diag    += 9;
54b01c7715SBarry Smith 	mdiag   += 9;
55b01c7715SBarry Smith       }
56b01c7715SBarry Smith       break;
57b01c7715SBarry Smith     case 4:
58b01c7715SBarry Smith       for (i=0; i<mbs; i++) {
59b01c7715SBarry Smith         odiag  = v + 16*diag_offset[i];
60b01c7715SBarry Smith         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
61b01c7715SBarry Smith         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
62b01c7715SBarry Smith 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
63b01c7715SBarry Smith 	diag  += 16;
64b01c7715SBarry Smith 	mdiag += 16;
65b01c7715SBarry Smith       }
66b01c7715SBarry Smith       break;
67b01c7715SBarry Smith     case 5:
68b01c7715SBarry Smith       for (i=0; i<mbs; i++) {
69b01c7715SBarry Smith         odiag = v + 25*diag_offset[i];
70b01c7715SBarry Smith         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
71b01c7715SBarry Smith         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
72b01c7715SBarry Smith 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
73b01c7715SBarry Smith 	diag  += 25;
74b01c7715SBarry Smith 	mdiag += 25;
75b01c7715SBarry Smith       }
76b01c7715SBarry Smith       break;
77b01c7715SBarry Smith     default:
78521d7252SBarry Smith       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
79b01c7715SBarry Smith   }
80b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
81b01c7715SBarry Smith   PetscFunctionReturn(0);
82b01c7715SBarry Smith }
83b01c7715SBarry Smith 
84b01c7715SBarry Smith #undef __FUNCT__
85b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
86c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
87b01c7715SBarry Smith {
88b01c7715SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
89b01c7715SBarry Smith   PetscScalar        *x,x1,x2,s1,s2;
90b01c7715SBarry Smith   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
91dfbe8321SBarry Smith   PetscErrorCode     ierr;
92c1ac3661SBarry Smith   PetscInt           m = a->mbs,i,i2,nz,idx;
93c1ac3661SBarry Smith   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
94b01c7715SBarry Smith 
95b01c7715SBarry Smith   PetscFunctionBegin;
96b01c7715SBarry Smith   its = its*lits;
9777431f27SBarry Smith   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
98b01c7715SBarry Smith   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
99b01c7715SBarry Smith   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
100b01c7715SBarry Smith   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
101b01c7715SBarry Smith   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
102b01c7715SBarry Smith 
103b01c7715SBarry Smith   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
104b01c7715SBarry Smith 
105b01c7715SBarry Smith   diag  = a->diag;
106b01c7715SBarry Smith   idiag = a->idiag;
1071ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1081ebc52fbSHong Zhang   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
109b01c7715SBarry Smith 
110b01c7715SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
111b01c7715SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
112b01c7715SBarry Smith       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
113b01c7715SBarry Smith       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
114b01c7715SBarry Smith       i2     = 2;
115b01c7715SBarry Smith       idiag += 4;
116b01c7715SBarry Smith       for (i=1; i<m; i++) {
117b01c7715SBarry Smith 	v     = aa + 4*ai[i];
118b01c7715SBarry Smith 	vi    = aj + ai[i];
119b01c7715SBarry Smith 	nz    = diag[i] - ai[i];
120b01c7715SBarry Smith 	s1    = b[i2]; s2 = b[i2+1];
121b01c7715SBarry Smith 	while (nz--) {
122b01c7715SBarry Smith 	  idx  = 2*(*vi++);
123b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx];
124b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[2]*x2;
125b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[3]*x2;
126b01c7715SBarry Smith 	  v   += 4;
127b01c7715SBarry Smith 	}
128b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
129b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
130b01c7715SBarry Smith         idiag   += 4;
131b01c7715SBarry Smith         i2      += 2;
132b01c7715SBarry Smith       }
133b01c7715SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
134efee365bSSatish Balay       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
135b01c7715SBarry Smith     }
136b01c7715SBarry Smith     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
137b01c7715SBarry Smith         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
138b01c7715SBarry Smith       i2    = 0;
139b01c7715SBarry Smith       mdiag = a->idiag+4*a->mbs;
140b01c7715SBarry Smith       for (i=0; i<m; i++) {
141b01c7715SBarry Smith         x1      = x[i2]; x2 = x[i2+1];
142b01c7715SBarry Smith         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
143b01c7715SBarry Smith         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
144b01c7715SBarry Smith         mdiag  += 4;
145b01c7715SBarry Smith         i2     += 2;
146b01c7715SBarry Smith       }
147efee365bSSatish Balay       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
148b01c7715SBarry Smith     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
149b01c7715SBarry Smith       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
150b01c7715SBarry Smith     }
151b01c7715SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
152b01c7715SBarry Smith       idiag   = a->idiag+4*a->mbs - 4;
153b01c7715SBarry Smith       i2      = 2*m - 2;
154b01c7715SBarry Smith       x1      = x[i2]; x2 = x[i2+1];
155b01c7715SBarry Smith       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
156b01c7715SBarry Smith       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
157b01c7715SBarry Smith       idiag -= 4;
158b01c7715SBarry Smith       i2    -= 2;
159b01c7715SBarry Smith       for (i=m-2; i>=0; i--) {
160b01c7715SBarry Smith 	v     = aa + 4*(diag[i]+1);
161b01c7715SBarry Smith 	vi    = aj + diag[i] + 1;
162b01c7715SBarry Smith 	nz    = ai[i+1] - diag[i] - 1;
163b01c7715SBarry Smith 	s1    = x[i2]; s2 = x[i2+1];
164b01c7715SBarry Smith 	while (nz--) {
165b01c7715SBarry Smith 	  idx  = 2*(*vi++);
166b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx];
167b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[2]*x2;
168b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[3]*x2;
169b01c7715SBarry Smith 	  v   += 4;
170b01c7715SBarry Smith 	}
171b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
172b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
173b01c7715SBarry Smith         idiag   -= 4;
174b01c7715SBarry Smith         i2      -= 2;
175b01c7715SBarry Smith       }
176efee365bSSatish Balay       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
177b01c7715SBarry Smith     }
178b01c7715SBarry Smith   } else {
179634064b4SBarry Smith     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
180b01c7715SBarry Smith   }
1811ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1821ebc52fbSHong Zhang   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
183b01c7715SBarry Smith   PetscFunctionReturn(0);
184b01c7715SBarry Smith }
185b01c7715SBarry Smith 
186b01c7715SBarry Smith #undef __FUNCT__
187b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
188c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
189b01c7715SBarry Smith {
190b01c7715SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
191b01c7715SBarry Smith   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
192b01c7715SBarry Smith   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
193dfbe8321SBarry Smith   PetscErrorCode     ierr;
194c1ac3661SBarry Smith   PetscInt           m = a->mbs,i,i2,nz,idx;
195c1ac3661SBarry Smith   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
196b01c7715SBarry Smith 
197b01c7715SBarry Smith   PetscFunctionBegin;
198b01c7715SBarry Smith   its = its*lits;
19977431f27SBarry Smith   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
200b01c7715SBarry Smith   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
201b01c7715SBarry Smith   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
202b01c7715SBarry Smith   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
203b01c7715SBarry Smith   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
204b01c7715SBarry Smith 
205b01c7715SBarry Smith   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
206b01c7715SBarry Smith 
207b01c7715SBarry Smith   diag  = a->diag;
208b01c7715SBarry Smith   idiag = a->idiag;
2091ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2101ebc52fbSHong Zhang   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
211b01c7715SBarry Smith 
212b01c7715SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
213b01c7715SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
214b01c7715SBarry Smith       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
215b01c7715SBarry Smith       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
216b01c7715SBarry Smith       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
217b01c7715SBarry Smith       i2     = 3;
218b01c7715SBarry Smith       idiag += 9;
219b01c7715SBarry Smith       for (i=1; i<m; i++) {
220b01c7715SBarry Smith 	v     = aa + 9*ai[i];
221b01c7715SBarry Smith 	vi    = aj + ai[i];
222b01c7715SBarry Smith 	nz    = diag[i] - ai[i];
223b01c7715SBarry Smith 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
224b01c7715SBarry Smith 	while (nz--) {
225b01c7715SBarry Smith 	  idx  = 3*(*vi++);
226b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
227b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
228b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
229b01c7715SBarry Smith 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
230b01c7715SBarry Smith 	  v   += 9;
231b01c7715SBarry Smith 	}
232b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
233b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
234b01c7715SBarry Smith 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
235b01c7715SBarry Smith         idiag   += 9;
236b01c7715SBarry Smith         i2      += 3;
237b01c7715SBarry Smith       }
238b01c7715SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
239efee365bSSatish Balay       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
240b01c7715SBarry Smith     }
241b01c7715SBarry Smith     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
242b01c7715SBarry Smith         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
243b01c7715SBarry Smith       i2    = 0;
244b01c7715SBarry Smith       mdiag = a->idiag+9*a->mbs;
245b01c7715SBarry Smith       for (i=0; i<m; i++) {
246b01c7715SBarry Smith         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
247b01c7715SBarry Smith         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
248b01c7715SBarry Smith         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
249b01c7715SBarry Smith         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
250b01c7715SBarry Smith         mdiag  += 9;
251b01c7715SBarry Smith         i2     += 3;
252b01c7715SBarry Smith       }
253efee365bSSatish Balay       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
254b01c7715SBarry Smith     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
255b01c7715SBarry Smith       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
256b01c7715SBarry Smith     }
257b01c7715SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
258b01c7715SBarry Smith       idiag   = a->idiag+9*a->mbs - 9;
259b01c7715SBarry Smith       i2      = 3*m - 3;
260b01c7715SBarry Smith       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
261b01c7715SBarry Smith       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
262b01c7715SBarry Smith       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
263b01c7715SBarry Smith       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
264b01c7715SBarry Smith       idiag -= 9;
265b01c7715SBarry Smith       i2    -= 3;
266b01c7715SBarry Smith       for (i=m-2; i>=0; i--) {
267b01c7715SBarry Smith 	v     = aa + 9*(diag[i]+1);
268b01c7715SBarry Smith 	vi    = aj + diag[i] + 1;
269b01c7715SBarry Smith 	nz    = ai[i+1] - diag[i] - 1;
270b01c7715SBarry Smith 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
271b01c7715SBarry Smith 	while (nz--) {
272b01c7715SBarry Smith 	  idx  = 3*(*vi++);
273b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
274b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
275b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
276b01c7715SBarry Smith 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
277b01c7715SBarry Smith 	  v   += 9;
278b01c7715SBarry Smith 	}
279b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
280b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
281b01c7715SBarry Smith 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
282b01c7715SBarry Smith         idiag   -= 9;
283b01c7715SBarry Smith         i2      -= 3;
284b01c7715SBarry Smith       }
285efee365bSSatish Balay       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
286b01c7715SBarry Smith     }
287b01c7715SBarry Smith   } else {
288634064b4SBarry Smith     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
289b01c7715SBarry Smith   }
2901ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2911ebc52fbSHong Zhang   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
292b01c7715SBarry Smith   PetscFunctionReturn(0);
293b01c7715SBarry Smith }
294b01c7715SBarry Smith 
295b01c7715SBarry Smith #undef __FUNCT__
296b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
297c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
298b01c7715SBarry Smith {
299b01c7715SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
300b01c7715SBarry Smith   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
301b01c7715SBarry Smith   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
302dfbe8321SBarry Smith   PetscErrorCode     ierr;
303c1ac3661SBarry Smith   PetscInt           m = a->mbs,i,i2,nz,idx;
304c1ac3661SBarry Smith   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
305b01c7715SBarry Smith 
306b01c7715SBarry Smith   PetscFunctionBegin;
307b01c7715SBarry Smith   its = its*lits;
30877431f27SBarry Smith   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
309b01c7715SBarry Smith   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
310b01c7715SBarry Smith   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
311b01c7715SBarry Smith   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
312b01c7715SBarry Smith   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
313b01c7715SBarry Smith 
314b01c7715SBarry Smith   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
315b01c7715SBarry Smith 
316b01c7715SBarry Smith   diag  = a->diag;
317b01c7715SBarry Smith   idiag = a->idiag;
3181ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3191ebc52fbSHong Zhang   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
320b01c7715SBarry Smith 
321b01c7715SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
322b01c7715SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
323b01c7715SBarry Smith       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
324b01c7715SBarry Smith       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
325b01c7715SBarry Smith       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
326b01c7715SBarry Smith       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
327b01c7715SBarry Smith       i2     = 4;
328b01c7715SBarry Smith       idiag += 16;
329b01c7715SBarry Smith       for (i=1; i<m; i++) {
330b01c7715SBarry Smith 	v     = aa + 16*ai[i];
331b01c7715SBarry Smith 	vi    = aj + ai[i];
332b01c7715SBarry Smith 	nz    = diag[i] - ai[i];
333b01c7715SBarry Smith 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
334b01c7715SBarry Smith 	while (nz--) {
335b01c7715SBarry Smith 	  idx  = 4*(*vi++);
336b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
337b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
338b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
339b01c7715SBarry Smith 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
340b01c7715SBarry Smith 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
341b01c7715SBarry Smith 	  v   += 16;
342b01c7715SBarry Smith 	}
343b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
344b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
345b01c7715SBarry Smith 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
346b01c7715SBarry Smith 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
347b01c7715SBarry Smith         idiag   += 16;
348b01c7715SBarry Smith         i2      += 4;
349b01c7715SBarry Smith       }
350b01c7715SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
351efee365bSSatish Balay       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
352b01c7715SBarry Smith     }
353b01c7715SBarry Smith     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
354b01c7715SBarry Smith         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
355b01c7715SBarry Smith       i2    = 0;
356b01c7715SBarry Smith       mdiag = a->idiag+16*a->mbs;
357b01c7715SBarry Smith       for (i=0; i<m; i++) {
358b01c7715SBarry Smith         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
359b01c7715SBarry Smith         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
360b01c7715SBarry Smith         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
361b01c7715SBarry Smith         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
362b01c7715SBarry Smith         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
363b01c7715SBarry Smith         mdiag  += 16;
364b01c7715SBarry Smith         i2     += 4;
365b01c7715SBarry Smith       }
366efee365bSSatish Balay       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
367b01c7715SBarry Smith     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
368b01c7715SBarry Smith       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
369b01c7715SBarry Smith     }
370b01c7715SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
371b01c7715SBarry Smith       idiag   = a->idiag+16*a->mbs - 16;
372b01c7715SBarry Smith       i2      = 4*m - 4;
373b01c7715SBarry Smith       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
374b01c7715SBarry Smith       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
375b01c7715SBarry Smith       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
376b01c7715SBarry Smith       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
377b01c7715SBarry Smith       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
378b01c7715SBarry Smith       idiag -= 16;
379b01c7715SBarry Smith       i2    -= 4;
380b01c7715SBarry Smith       for (i=m-2; i>=0; i--) {
381b01c7715SBarry Smith 	v     = aa + 16*(diag[i]+1);
382b01c7715SBarry Smith 	vi    = aj + diag[i] + 1;
383b01c7715SBarry Smith 	nz    = ai[i+1] - diag[i] - 1;
384b01c7715SBarry Smith 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
385b01c7715SBarry Smith 	while (nz--) {
386b01c7715SBarry Smith 	  idx  = 4*(*vi++);
387b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
388b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
389b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
390b01c7715SBarry Smith 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
391b01c7715SBarry Smith 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
392b01c7715SBarry Smith 	  v   += 16;
393b01c7715SBarry Smith 	}
394b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
395b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
396b01c7715SBarry Smith 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
397b01c7715SBarry Smith 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
398b01c7715SBarry Smith         idiag   -= 16;
399b01c7715SBarry Smith         i2      -= 4;
400b01c7715SBarry Smith       }
401efee365bSSatish Balay       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
402b01c7715SBarry Smith     }
403b01c7715SBarry Smith   } else {
404634064b4SBarry Smith     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
405b01c7715SBarry Smith   }
4061ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4071ebc52fbSHong Zhang   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
408b01c7715SBarry Smith   PetscFunctionReturn(0);
409b01c7715SBarry Smith }
410b01c7715SBarry Smith 
411b01c7715SBarry Smith #undef __FUNCT__
412b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
413c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
414b01c7715SBarry Smith {
415b01c7715SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
416b01c7715SBarry Smith   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
417b01c7715SBarry Smith   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
418dfbe8321SBarry Smith   PetscErrorCode     ierr;
419c1ac3661SBarry Smith   PetscInt           m = a->mbs,i,i2,nz,idx;
420c1ac3661SBarry Smith   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
421b01c7715SBarry Smith 
422b01c7715SBarry Smith   PetscFunctionBegin;
423b01c7715SBarry Smith   its = its*lits;
42477431f27SBarry Smith   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
425b01c7715SBarry Smith   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
426b01c7715SBarry Smith   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
427b01c7715SBarry Smith   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
428b01c7715SBarry Smith   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
429b01c7715SBarry Smith 
430b01c7715SBarry Smith   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
431b01c7715SBarry Smith 
432b01c7715SBarry Smith   diag  = a->diag;
433b01c7715SBarry Smith   idiag = a->idiag;
4341ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4351ebc52fbSHong Zhang   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
436b01c7715SBarry Smith 
437b01c7715SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
438b01c7715SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
439b01c7715SBarry Smith       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
440b01c7715SBarry Smith       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
441b01c7715SBarry Smith       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
442b01c7715SBarry Smith       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
443b01c7715SBarry Smith       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
444b01c7715SBarry Smith       i2     = 5;
445b01c7715SBarry Smith       idiag += 25;
446b01c7715SBarry Smith       for (i=1; i<m; i++) {
447b01c7715SBarry Smith 	v     = aa + 25*ai[i];
448b01c7715SBarry Smith 	vi    = aj + ai[i];
449b01c7715SBarry Smith 	nz    = diag[i] - ai[i];
450b01c7715SBarry Smith 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
451b01c7715SBarry Smith 	while (nz--) {
452b01c7715SBarry Smith 	  idx  = 5*(*vi++);
453b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
454b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
455b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
456b01c7715SBarry Smith 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
457b01c7715SBarry Smith 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
458b01c7715SBarry Smith 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
459b01c7715SBarry Smith 	  v   += 25;
460b01c7715SBarry Smith 	}
461b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
462b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
463b01c7715SBarry Smith 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
464b01c7715SBarry Smith 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
465b01c7715SBarry Smith 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
466b01c7715SBarry Smith         idiag   += 25;
467b01c7715SBarry Smith         i2      += 5;
468b01c7715SBarry Smith       }
469b01c7715SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
470efee365bSSatish Balay       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
471b01c7715SBarry Smith     }
472b01c7715SBarry Smith     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
473b01c7715SBarry Smith         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
474b01c7715SBarry Smith       i2    = 0;
475b01c7715SBarry Smith       mdiag = a->idiag+25*a->mbs;
476b01c7715SBarry Smith       for (i=0; i<m; i++) {
477b01c7715SBarry Smith         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
478b01c7715SBarry Smith         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
479b01c7715SBarry Smith         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
480b01c7715SBarry Smith         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
481b01c7715SBarry Smith         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
482b01c7715SBarry Smith         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
483b01c7715SBarry Smith         mdiag  += 25;
484b01c7715SBarry Smith         i2     += 5;
485b01c7715SBarry Smith       }
486efee365bSSatish Balay       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
487b01c7715SBarry Smith     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
488b01c7715SBarry Smith       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
489b01c7715SBarry Smith     }
490b01c7715SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
491b01c7715SBarry Smith       idiag   = a->idiag+25*a->mbs - 25;
492b01c7715SBarry Smith       i2      = 5*m - 5;
493b01c7715SBarry Smith       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
494b01c7715SBarry Smith       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
495b01c7715SBarry Smith       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
496b01c7715SBarry Smith       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
497b01c7715SBarry Smith       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
498b01c7715SBarry Smith       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
499b01c7715SBarry Smith       idiag -= 25;
500b01c7715SBarry Smith       i2    -= 5;
501b01c7715SBarry Smith       for (i=m-2; i>=0; i--) {
502b01c7715SBarry Smith 	v     = aa + 25*(diag[i]+1);
503b01c7715SBarry Smith 	vi    = aj + diag[i] + 1;
504b01c7715SBarry Smith 	nz    = ai[i+1] - diag[i] - 1;
505b01c7715SBarry Smith 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
506b01c7715SBarry Smith 	while (nz--) {
507b01c7715SBarry Smith 	  idx  = 5*(*vi++);
508b01c7715SBarry Smith 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
509b01c7715SBarry Smith 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
510b01c7715SBarry Smith 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
511b01c7715SBarry Smith 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
512b01c7715SBarry Smith 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
513b01c7715SBarry Smith 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
514b01c7715SBarry Smith 	  v   += 25;
515b01c7715SBarry Smith 	}
516b01c7715SBarry Smith 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
517b01c7715SBarry Smith 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
518b01c7715SBarry Smith 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
519b01c7715SBarry Smith 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
520b01c7715SBarry Smith 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
521b01c7715SBarry Smith         idiag   -= 25;
522b01c7715SBarry Smith         i2      -= 5;
523b01c7715SBarry Smith       }
524efee365bSSatish Balay       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
525b01c7715SBarry Smith     }
526b01c7715SBarry Smith   } else {
527634064b4SBarry Smith     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
528b01c7715SBarry Smith   }
5291ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5301ebc52fbSHong Zhang   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
531b01c7715SBarry Smith   PetscFunctionReturn(0);
532b01c7715SBarry Smith }
533b01c7715SBarry Smith 
534af674e45SBarry Smith /*
535af674e45SBarry Smith     Special version for Fun3d sequential benchmark
536af674e45SBarry Smith */
537af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
538af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
539af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
540af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4
541af674e45SBarry Smith #endif
542af674e45SBarry Smith 
5439c8c1041SBarry Smith EXTERN_C_BEGIN
544af674e45SBarry Smith #undef __FUNCT__
545af674e45SBarry Smith #define __FUNCT__ "matsetvaluesblocked4_"
546*be1d678aSKris Buschelman void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
547af674e45SBarry Smith {
548af674e45SBarry Smith   Mat               A = *AA;
549af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
550c1ac3661SBarry Smith   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
551c1ac3661SBarry Smith   PetscInt          *ai=a->i,*ailen=a->ilen;
552c1ac3661SBarry Smith   PetscInt          *aj=a->j,stepval;
553f15d580aSBarry Smith   const PetscScalar *value = v;
5544bb09213Spetsc   MatScalar         *ap,*aa = a->a,*bap;
555af674e45SBarry Smith 
556af674e45SBarry Smith   PetscFunctionBegin;
557af674e45SBarry Smith   stepval = (n-1)*4;
558af674e45SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
559af674e45SBarry Smith     row  = im[k];
560af674e45SBarry Smith     rp   = aj + ai[row];
561af674e45SBarry Smith     ap   = aa + 16*ai[row];
562af674e45SBarry Smith     nrow = ailen[row];
563af674e45SBarry Smith     low  = 0;
564af674e45SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
565af674e45SBarry Smith       col = in[l];
566af674e45SBarry Smith       value = v + k*(stepval+4)*4 + l*4;
567af674e45SBarry Smith       low = 0; high = nrow;
568af674e45SBarry Smith       while (high-low > 7) {
569af674e45SBarry Smith         t = (low+high)/2;
570af674e45SBarry Smith         if (rp[t] > col) high = t;
571af674e45SBarry Smith         else             low  = t;
572af674e45SBarry Smith       }
573af674e45SBarry Smith       for (i=low; i<high; i++) {
574af674e45SBarry Smith         if (rp[i] > col) break;
575af674e45SBarry Smith         if (rp[i] == col) {
576af674e45SBarry Smith           bap  = ap +  16*i;
577af674e45SBarry Smith           for (ii=0; ii<4; ii++,value+=stepval) {
578af674e45SBarry Smith             for (jj=ii; jj<16; jj+=4) {
579af674e45SBarry Smith               bap[jj] += *value++;
580af674e45SBarry Smith             }
581af674e45SBarry Smith           }
582af674e45SBarry Smith           goto noinsert2;
583af674e45SBarry Smith         }
584af674e45SBarry Smith       }
585af674e45SBarry Smith       N = nrow++ - 1;
586af674e45SBarry Smith       /* shift up all the later entries in this row */
587af674e45SBarry Smith       for (ii=N; ii>=i; ii--) {
588af674e45SBarry Smith         rp[ii+1] = rp[ii];
589a037b02bSBarry Smith         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
590af674e45SBarry Smith       }
591af674e45SBarry Smith       if (N >= i) {
592a037b02bSBarry Smith         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
593af674e45SBarry Smith       }
594af674e45SBarry Smith       rp[i] = col;
595af674e45SBarry Smith       bap   = ap +  16*i;
596af674e45SBarry Smith       for (ii=0; ii<4; ii++,value+=stepval) {
597af674e45SBarry Smith         for (jj=ii; jj<16; jj+=4) {
598af674e45SBarry Smith           bap[jj] = *value++;
599af674e45SBarry Smith         }
600af674e45SBarry Smith       }
601af674e45SBarry Smith       noinsert2:;
602af674e45SBarry Smith       low = i;
603af674e45SBarry Smith     }
604af674e45SBarry Smith     ailen[row] = nrow;
605af674e45SBarry Smith   }
606*be1d678aSKris Buschelman   PetscFunctionReturnVoid();
607af674e45SBarry Smith }
6089c8c1041SBarry Smith EXTERN_C_END
609af674e45SBarry Smith 
610af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
611af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4
612af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
613af674e45SBarry Smith #define matsetvalues4_ matsetvalues4
614af674e45SBarry Smith #endif
615af674e45SBarry Smith 
6169c8c1041SBarry Smith EXTERN_C_BEGIN
617af674e45SBarry Smith #undef __FUNCT__
618af674e45SBarry Smith #define __FUNCT__ "MatSetValues4_"
619*be1d678aSKris Buschelman void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
620af674e45SBarry Smith {
621af674e45SBarry Smith   Mat         A = *AA;
622af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
623c1ac3661SBarry Smith   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
624c1ac3661SBarry Smith   PetscInt    *ai=a->i,*ailen=a->ilen;
625c1ac3661SBarry Smith   PetscInt    *aj=a->j,brow,bcol;
626c1ac3661SBarry Smith   PetscInt    ridx,cidx;
627af674e45SBarry Smith   MatScalar   *ap,value,*aa=a->a,*bap;
628af674e45SBarry Smith 
629af674e45SBarry Smith   PetscFunctionBegin;
630af674e45SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
631af674e45SBarry Smith     row  = im[k]; brow = row/4;
632af674e45SBarry Smith     rp   = aj + ai[brow];
633af674e45SBarry Smith     ap   = aa + 16*ai[brow];
634af674e45SBarry Smith     nrow = ailen[brow];
635af674e45SBarry Smith     low  = 0;
636af674e45SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
637af674e45SBarry Smith       col = in[l]; bcol = col/4;
638af674e45SBarry Smith       ridx = row % 4; cidx = col % 4;
639af674e45SBarry Smith       value = v[l + k*n];
640af674e45SBarry Smith       low = 0; high = nrow;
641af674e45SBarry Smith       while (high-low > 7) {
642af674e45SBarry Smith         t = (low+high)/2;
643af674e45SBarry Smith         if (rp[t] > bcol) high = t;
644af674e45SBarry Smith         else              low  = t;
645af674e45SBarry Smith       }
646af674e45SBarry Smith       for (i=low; i<high; i++) {
647af674e45SBarry Smith         if (rp[i] > bcol) break;
648af674e45SBarry Smith         if (rp[i] == bcol) {
649af674e45SBarry Smith           bap  = ap +  16*i + 4*cidx + ridx;
650af674e45SBarry Smith           *bap += value;
651af674e45SBarry Smith           goto noinsert1;
652af674e45SBarry Smith         }
653af674e45SBarry Smith       }
654af674e45SBarry Smith       N = nrow++ - 1;
655af674e45SBarry Smith       /* shift up all the later entries in this row */
656af674e45SBarry Smith       for (ii=N; ii>=i; ii--) {
657af674e45SBarry Smith         rp[ii+1] = rp[ii];
658a037b02bSBarry Smith         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
659af674e45SBarry Smith       }
660af674e45SBarry Smith       if (N>=i) {
661a037b02bSBarry Smith         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
662af674e45SBarry Smith       }
663af674e45SBarry Smith       rp[i]                    = bcol;
664af674e45SBarry Smith       ap[16*i + 4*cidx + ridx] = value;
665af674e45SBarry Smith       noinsert1:;
666af674e45SBarry Smith       low = i;
667af674e45SBarry Smith     }
668af674e45SBarry Smith     ailen[brow] = nrow;
669af674e45SBarry Smith   }
670*be1d678aSKris Buschelman   PetscFunctionReturnVoid();
671af674e45SBarry Smith }
6729c8c1041SBarry Smith EXTERN_C_END
673af674e45SBarry Smith 
67495d5f7c2SBarry Smith /*  UGLY, ugly, ugly
67587828ca2SBarry Smith    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
6763477af42SKris Buschelman    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
67795d5f7c2SBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
67895d5f7c2SBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
67995d5f7c2SBarry Smith    into the single precision data structures.
68095d5f7c2SBarry Smith */
68195d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
682690b6cddSBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
68395d5f7c2SBarry Smith #else
68495d5f7c2SBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
68595d5f7c2SBarry Smith #endif
68695d5f7c2SBarry Smith 
6872d61bbb3SSatish Balay #define CHUNKSIZE  10
688de6a44a3SBarry Smith 
689be5855fcSBarry Smith /*
690be5855fcSBarry Smith      Checks for missing diagonals
691be5855fcSBarry Smith */
6924a2ae208SSatish Balay #undef __FUNCT__
6934a2ae208SSatish Balay #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
694dfbe8321SBarry Smith PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
695be5855fcSBarry Smith {
696be5855fcSBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
6976849ba73SBarry Smith   PetscErrorCode ierr;
698c1ac3661SBarry Smith   PetscInt       *diag,*jj = a->j,i;
699be5855fcSBarry Smith 
700be5855fcSBarry Smith   PetscFunctionBegin;
701c4992f7dSBarry Smith   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
702883fce79SBarry Smith   diag = a->diag;
7030e8e8aceSBarry Smith   for (i=0; i<a->mbs; i++) {
704be5855fcSBarry Smith     if (jj[diag[i]] != i) {
70577431f27SBarry Smith       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
706be5855fcSBarry Smith     }
707be5855fcSBarry Smith   }
708be5855fcSBarry Smith   PetscFunctionReturn(0);
709be5855fcSBarry Smith }
710be5855fcSBarry Smith 
7114a2ae208SSatish Balay #undef __FUNCT__
7124a2ae208SSatish Balay #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
713dfbe8321SBarry Smith PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
714de6a44a3SBarry Smith {
715de6a44a3SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
7166849ba73SBarry Smith   PetscErrorCode ierr;
717c1ac3661SBarry Smith   PetscInt       i,j,*diag,m = a->mbs;
718de6a44a3SBarry Smith 
7193a40ed3dSBarry Smith   PetscFunctionBegin;
720883fce79SBarry Smith   if (a->diag) PetscFunctionReturn(0);
721883fce79SBarry Smith 
722c1ac3661SBarry Smith   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
72352e6d16bSBarry Smith   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
7247fc0212eSBarry Smith   for (i=0; i<m; i++) {
725ecc615b2SBarry Smith     diag[i] = a->i[i+1];
726de6a44a3SBarry Smith     for (j=a->i[i]; j<a->i[i+1]; j++) {
727de6a44a3SBarry Smith       if (a->j[j] == i) {
728de6a44a3SBarry Smith         diag[i] = j;
729de6a44a3SBarry Smith         break;
730de6a44a3SBarry Smith       }
731de6a44a3SBarry Smith     }
732de6a44a3SBarry Smith   }
733de6a44a3SBarry Smith   a->diag = diag;
7343a40ed3dSBarry Smith   PetscFunctionReturn(0);
735de6a44a3SBarry Smith }
7362593348eSBarry Smith 
7372593348eSBarry Smith 
738690b6cddSBarry Smith EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
7393b2fbd54SBarry Smith 
7404a2ae208SSatish Balay #undef __FUNCT__
7414a2ae208SSatish Balay #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
742c1ac3661SBarry Smith static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
7433b2fbd54SBarry Smith {
7443b2fbd54SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
745dfbe8321SBarry Smith   PetscErrorCode ierr;
746c1ac3661SBarry Smith   PetscInt       n = a->mbs,i;
7473b2fbd54SBarry Smith 
7483a40ed3dSBarry Smith   PetscFunctionBegin;
7493b2fbd54SBarry Smith   *nn = n;
7503a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
7513b2fbd54SBarry Smith   if (symmetric) {
7523b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
7533b2fbd54SBarry Smith   } else if (oshift == 1) {
7543b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
755c1ac3661SBarry Smith     PetscInt nz = a->i[n];
7563b2fbd54SBarry Smith     for (i=0; i<nz; i++) a->j[i]++;
7573b2fbd54SBarry Smith     for (i=0; i<n+1; i++) a->i[i]++;
7583b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
7593b2fbd54SBarry Smith   } else {
7603b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
7613b2fbd54SBarry Smith   }
7623b2fbd54SBarry Smith 
7633a40ed3dSBarry Smith   PetscFunctionReturn(0);
7643b2fbd54SBarry Smith }
7653b2fbd54SBarry Smith 
7664a2ae208SSatish Balay #undef __FUNCT__
7674a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
768c1ac3661SBarry Smith static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
7693b2fbd54SBarry Smith {
7703b2fbd54SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
7716849ba73SBarry Smith   PetscErrorCode ierr;
772c1ac3661SBarry Smith   PetscInt       i,n = a->mbs;
7733b2fbd54SBarry Smith 
7743a40ed3dSBarry Smith   PetscFunctionBegin;
7753a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
7763b2fbd54SBarry Smith   if (symmetric) {
777606d414cSSatish Balay     ierr = PetscFree(*ia);CHKERRQ(ierr);
778606d414cSSatish Balay     ierr = PetscFree(*ja);CHKERRQ(ierr);
779af5da2bfSBarry Smith   } else if (oshift == 1) {
780c1ac3661SBarry Smith     PetscInt nz = a->i[n]-1;
7813b2fbd54SBarry Smith     for (i=0; i<nz; i++) a->j[i]--;
7823b2fbd54SBarry Smith     for (i=0; i<n+1; i++) a->i[i]--;
7833b2fbd54SBarry Smith   }
7843a40ed3dSBarry Smith   PetscFunctionReturn(0);
7853b2fbd54SBarry Smith }
7863b2fbd54SBarry Smith 
7874a2ae208SSatish Balay #undef __FUNCT__
7884a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqBAIJ"
789dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
7902d61bbb3SSatish Balay {
7912d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
792dfbe8321SBarry Smith   PetscErrorCode ierr;
7932d61bbb3SSatish Balay 
794433994e6SBarry Smith   PetscFunctionBegin;
795aa482453SBarry Smith #if defined(PETSC_USE_LOG)
79677431f27SBarry Smith   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
7972d61bbb3SSatish Balay #endif
798606d414cSSatish Balay   ierr = PetscFree(a->a);CHKERRQ(ierr);
799606d414cSSatish Balay   if (!a->singlemalloc) {
800606d414cSSatish Balay     ierr = PetscFree(a->i);CHKERRQ(ierr);
801606d414cSSatish Balay     ierr = PetscFree(a->j);CHKERRQ(ierr);
802606d414cSSatish Balay   }
803c38d4ed2SBarry Smith   if (a->row) {
804c38d4ed2SBarry Smith     ierr = ISDestroy(a->row);CHKERRQ(ierr);
805c38d4ed2SBarry Smith   }
806c38d4ed2SBarry Smith   if (a->col) {
807c38d4ed2SBarry Smith     ierr = ISDestroy(a->col);CHKERRQ(ierr);
808c38d4ed2SBarry Smith   }
809606d414cSSatish Balay   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
810a1d92eedSBarry Smith   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
811606d414cSSatish Balay   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
812606d414cSSatish Balay   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
813606d414cSSatish Balay   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
814606d414cSSatish Balay   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
815e51c0b9cSSatish Balay   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
816606d414cSSatish Balay   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
817563b5814SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
818563b5814SBarry Smith   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
819563b5814SBarry Smith #endif
820c4319e64SHong Zhang   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
82173e7a558SHong Zhang   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
822c4319e64SHong Zhang 
823606d414cSSatish Balay   ierr = PetscFree(a);CHKERRQ(ierr);
824901853e0SKris Buschelman 
825901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
826901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
827901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
828901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
829901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
830901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
8312d61bbb3SSatish Balay   PetscFunctionReturn(0);
8322d61bbb3SSatish Balay }
8332d61bbb3SSatish Balay 
8344a2ae208SSatish Balay #undef __FUNCT__
8354a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_SeqBAIJ"
836dfbe8321SBarry Smith PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
8372d61bbb3SSatish Balay {
8382d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
83963ba0a88SBarry Smith   PetscErrorCode ierr;
8402d61bbb3SSatish Balay 
8412d61bbb3SSatish Balay   PetscFunctionBegin;
842aa275fccSKris Buschelman   switch (op) {
843aa275fccSKris Buschelman   case MAT_ROW_ORIENTED:
844aa275fccSKris Buschelman     a->roworiented    = PETSC_TRUE;
845aa275fccSKris Buschelman     break;
846aa275fccSKris Buschelman   case MAT_COLUMN_ORIENTED:
847aa275fccSKris Buschelman     a->roworiented    = PETSC_FALSE;
848aa275fccSKris Buschelman     break;
849aa275fccSKris Buschelman   case MAT_COLUMNS_SORTED:
850aa275fccSKris Buschelman     a->sorted         = PETSC_TRUE;
851aa275fccSKris Buschelman     break;
852aa275fccSKris Buschelman   case MAT_COLUMNS_UNSORTED:
853aa275fccSKris Buschelman     a->sorted         = PETSC_FALSE;
854aa275fccSKris Buschelman     break;
855aa275fccSKris Buschelman   case MAT_KEEP_ZEROED_ROWS:
856aa275fccSKris Buschelman     a->keepzeroedrows = PETSC_TRUE;
857aa275fccSKris Buschelman     break;
858aa275fccSKris Buschelman   case MAT_NO_NEW_NONZERO_LOCATIONS:
859aa275fccSKris Buschelman     a->nonew          = 1;
860aa275fccSKris Buschelman     break;
861aa275fccSKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
862aa275fccSKris Buschelman     a->nonew          = -1;
863aa275fccSKris Buschelman     break;
864aa275fccSKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
865aa275fccSKris Buschelman     a->nonew          = -2;
866aa275fccSKris Buschelman     break;
867aa275fccSKris Buschelman   case MAT_YES_NEW_NONZERO_LOCATIONS:
868aa275fccSKris Buschelman     a->nonew          = 0;
869aa275fccSKris Buschelman     break;
870aa275fccSKris Buschelman   case MAT_ROWS_SORTED:
871aa275fccSKris Buschelman   case MAT_ROWS_UNSORTED:
872aa275fccSKris Buschelman   case MAT_YES_NEW_DIAGONALS:
873aa275fccSKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
874aa275fccSKris Buschelman   case MAT_USE_HASH_TABLE:
87563ba0a88SBarry Smith     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
876aa275fccSKris Buschelman     break;
877aa275fccSKris Buschelman   case MAT_NO_NEW_DIAGONALS:
87829bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
87977e54ba9SKris Buschelman   case MAT_SYMMETRIC:
88077e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
8819a4540c5SBarry Smith   case MAT_NOT_SYMMETRIC:
8829a4540c5SBarry Smith   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
8839a4540c5SBarry Smith   case MAT_HERMITIAN:
8849a4540c5SBarry Smith   case MAT_NOT_HERMITIAN:
8859a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
8869a4540c5SBarry Smith   case MAT_NOT_SYMMETRY_ETERNAL:
88777e54ba9SKris Buschelman     break;
888aa275fccSKris Buschelman   default:
88929bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"unknown option");
8902d61bbb3SSatish Balay   }
8912d61bbb3SSatish Balay   PetscFunctionReturn(0);
8922d61bbb3SSatish Balay }
8932d61bbb3SSatish Balay 
8944a2ae208SSatish Balay #undef __FUNCT__
8954a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_SeqBAIJ"
896c1ac3661SBarry Smith PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
8972d61bbb3SSatish Balay {
8982d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
8996849ba73SBarry Smith   PetscErrorCode ierr;
900c1ac3661SBarry Smith   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
9013f1db9ecSBarry Smith   MatScalar      *aa,*aa_i;
90287828ca2SBarry Smith   PetscScalar    *v_i;
9032d61bbb3SSatish Balay 
9042d61bbb3SSatish Balay   PetscFunctionBegin;
905521d7252SBarry Smith   bs  = A->bs;
9062d61bbb3SSatish Balay   ai  = a->i;
9072d61bbb3SSatish Balay   aj  = a->j;
9082d61bbb3SSatish Balay   aa  = a->a;
9092d61bbb3SSatish Balay   bs2 = a->bs2;
9102d61bbb3SSatish Balay 
91177431f27SBarry Smith   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
9122d61bbb3SSatish Balay 
9132d61bbb3SSatish Balay   bn  = row/bs;   /* Block number */
9142d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
9152d61bbb3SSatish Balay   M   = ai[bn+1] - ai[bn];
9162d61bbb3SSatish Balay   *nz = bs*M;
9172d61bbb3SSatish Balay 
9182d61bbb3SSatish Balay   if (v) {
9192d61bbb3SSatish Balay     *v = 0;
9202d61bbb3SSatish Balay     if (*nz) {
92187828ca2SBarry Smith       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
9222d61bbb3SSatish Balay       for (i=0; i<M; i++) { /* for each block in the block row */
9232d61bbb3SSatish Balay         v_i  = *v + i*bs;
9242d61bbb3SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
9252d61bbb3SSatish Balay         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
9262d61bbb3SSatish Balay       }
9272d61bbb3SSatish Balay     }
9282d61bbb3SSatish Balay   }
9292d61bbb3SSatish Balay 
9302d61bbb3SSatish Balay   if (idx) {
9312d61bbb3SSatish Balay     *idx = 0;
9322d61bbb3SSatish Balay     if (*nz) {
933c1ac3661SBarry Smith       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
9342d61bbb3SSatish Balay       for (i=0; i<M; i++) { /* for each block in the block row */
9352d61bbb3SSatish Balay         idx_i = *idx + i*bs;
9362d61bbb3SSatish Balay         itmp  = bs*aj[ai[bn] + i];
9372d61bbb3SSatish Balay         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
9382d61bbb3SSatish Balay       }
9392d61bbb3SSatish Balay     }
9402d61bbb3SSatish Balay   }
9412d61bbb3SSatish Balay   PetscFunctionReturn(0);
9422d61bbb3SSatish Balay }
9432d61bbb3SSatish Balay 
9444a2ae208SSatish Balay #undef __FUNCT__
9454a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
946c1ac3661SBarry Smith PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
9472d61bbb3SSatish Balay {
948dfbe8321SBarry Smith   PetscErrorCode ierr;
949606d414cSSatish Balay 
9502d61bbb3SSatish Balay   PetscFunctionBegin;
951606d414cSSatish Balay   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
952606d414cSSatish Balay   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
9532d61bbb3SSatish Balay   PetscFunctionReturn(0);
9542d61bbb3SSatish Balay }
9552d61bbb3SSatish Balay 
9564a2ae208SSatish Balay #undef __FUNCT__
9574a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_SeqBAIJ"
958dfbe8321SBarry Smith PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
9592d61bbb3SSatish Balay {
9602d61bbb3SSatish Balay   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
9612d61bbb3SSatish Balay   Mat            C;
9626849ba73SBarry Smith   PetscErrorCode ierr;
963521d7252SBarry Smith   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
964c1ac3661SBarry Smith   PetscInt       *rows,*cols,bs2=a->bs2;
96587828ca2SBarry Smith   PetscScalar    *array;
9662d61bbb3SSatish Balay 
9672d61bbb3SSatish Balay   PetscFunctionBegin;
96829bbc08cSBarry Smith   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
969c1ac3661SBarry Smith   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
970c1ac3661SBarry Smith   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
9712d61bbb3SSatish Balay 
972375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
97387828ca2SBarry Smith   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
97487828ca2SBarry Smith   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
975375fe846SBarry Smith #else
9763eda8832SBarry Smith   array = a->a;
977375fe846SBarry Smith #endif
978375fe846SBarry Smith 
9792d61bbb3SSatish Balay   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
980f204ca49SKris Buschelman   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
981f204ca49SKris Buschelman   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
982f204ca49SKris Buschelman   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
983606d414cSSatish Balay   ierr = PetscFree(col);CHKERRQ(ierr);
984c1ac3661SBarry Smith   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
9852d61bbb3SSatish Balay   cols = rows + bs;
9862d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
9872d61bbb3SSatish Balay     cols[0] = i*bs;
9882d61bbb3SSatish Balay     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
9892d61bbb3SSatish Balay     len = ai[i+1] - ai[i];
9902d61bbb3SSatish Balay     for (j=0; j<len; j++) {
9912d61bbb3SSatish Balay       rows[0] = (*aj++)*bs;
9922d61bbb3SSatish Balay       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
9932d61bbb3SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
9942d61bbb3SSatish Balay       array += bs2;
9952d61bbb3SSatish Balay     }
9962d61bbb3SSatish Balay   }
997606d414cSSatish Balay   ierr = PetscFree(rows);CHKERRQ(ierr);
998375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
999375fe846SBarry Smith   ierr = PetscFree(array);
1000375fe846SBarry Smith #endif
10012d61bbb3SSatish Balay 
10022d61bbb3SSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10032d61bbb3SSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10042d61bbb3SSatish Balay 
1005c4992f7dSBarry Smith   if (B) {
10062d61bbb3SSatish Balay     *B = C;
10072d61bbb3SSatish Balay   } else {
1008273d9f13SBarry Smith     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
10092d61bbb3SSatish Balay   }
10102d61bbb3SSatish Balay   PetscFunctionReturn(0);
10112d61bbb3SSatish Balay }
10122d61bbb3SSatish Balay 
10134a2ae208SSatish Balay #undef __FUNCT__
10144a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Binary"
10156849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
10162593348eSBarry Smith {
1017b6490206SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
10186849ba73SBarry Smith   PetscErrorCode ierr;
1019521d7252SBarry Smith   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1020b24ad042SBarry Smith   int            fd;
102187828ca2SBarry Smith   PetscScalar    *aa;
1022ce6f0cecSBarry Smith   FILE           *file;
10232593348eSBarry Smith 
10243a40ed3dSBarry Smith   PetscFunctionBegin;
1025b0a32e0cSBarry Smith   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1026c1ac3661SBarry Smith   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1027552e946dSBarry Smith   col_lens[0] = MAT_FILE_COOKIE;
10283b2fbd54SBarry Smith 
1029273d9f13SBarry Smith   col_lens[1] = A->m;
1030273d9f13SBarry Smith   col_lens[2] = A->n;
10317e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
10322593348eSBarry Smith 
10332593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
1034b6490206SBarry Smith   count = 0;
1035b6490206SBarry Smith   for (i=0; i<a->mbs; i++) {
1036b6490206SBarry Smith     for (j=0; j<bs; j++) {
1037b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1038b6490206SBarry Smith     }
10392593348eSBarry Smith   }
10406f69ff64SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1041606d414cSSatish Balay   ierr = PetscFree(col_lens);CHKERRQ(ierr);
10422593348eSBarry Smith 
10432593348eSBarry Smith   /* store column indices (zero start index) */
1044c1ac3661SBarry Smith   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1045b6490206SBarry Smith   count = 0;
1046b6490206SBarry Smith   for (i=0; i<a->mbs; i++) {
1047b6490206SBarry Smith     for (j=0; j<bs; j++) {
1048b6490206SBarry Smith       for (k=a->i[i]; k<a->i[i+1]; k++) {
1049b6490206SBarry Smith         for (l=0; l<bs; l++) {
1050b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
10512593348eSBarry Smith         }
10522593348eSBarry Smith       }
1053b6490206SBarry Smith     }
1054b6490206SBarry Smith   }
10556f69ff64SBarry Smith   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1056606d414cSSatish Balay   ierr = PetscFree(jj);CHKERRQ(ierr);
10572593348eSBarry Smith 
10582593348eSBarry Smith   /* store nonzero values */
105987828ca2SBarry Smith   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1060b6490206SBarry Smith   count = 0;
1061b6490206SBarry Smith   for (i=0; i<a->mbs; i++) {
1062b6490206SBarry Smith     for (j=0; j<bs; j++) {
1063b6490206SBarry Smith       for (k=a->i[i]; k<a->i[i+1]; k++) {
1064b6490206SBarry Smith         for (l=0; l<bs; l++) {
10657e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
1066b6490206SBarry Smith         }
1067b6490206SBarry Smith       }
1068b6490206SBarry Smith     }
1069b6490206SBarry Smith   }
10706f69ff64SBarry Smith   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1071606d414cSSatish Balay   ierr = PetscFree(aa);CHKERRQ(ierr);
1072ce6f0cecSBarry Smith 
1073b0a32e0cSBarry Smith   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1074ce6f0cecSBarry Smith   if (file) {
1075521d7252SBarry Smith     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1076ce6f0cecSBarry Smith   }
10773a40ed3dSBarry Smith   PetscFunctionReturn(0);
10782593348eSBarry Smith }
10792593348eSBarry Smith 
10804a2ae208SSatish Balay #undef __FUNCT__
10814a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
10826849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
10832593348eSBarry Smith {
1084b6490206SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1085dfbe8321SBarry Smith   PetscErrorCode    ierr;
1086521d7252SBarry Smith   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1087f3ef73ceSBarry Smith   PetscViewerFormat format;
10882593348eSBarry Smith 
10893a40ed3dSBarry Smith   PetscFunctionBegin;
1090b0a32e0cSBarry Smith   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1091456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
109277431f27SBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1093fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1094bcd9e38bSBarry Smith     Mat aij;
1095ceb03754SKris Buschelman     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1096bcd9e38bSBarry Smith     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1097bcd9e38bSBarry Smith     ierr = MatDestroy(aij);CHKERRQ(ierr);
109804929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
109904929863SHong Zhang      PetscFunctionReturn(0);
1100fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1101b0a32e0cSBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
110244cd7ae7SLois Curfman McInnes     for (i=0; i<a->mbs; i++) {
110344cd7ae7SLois Curfman McInnes       for (j=0; j<bs; j++) {
110477431f27SBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
110544cd7ae7SLois Curfman McInnes         for (k=a->i[i]; k<a->i[i+1]; k++) {
110644cd7ae7SLois Curfman McInnes           for (l=0; l<bs; l++) {
1107aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
11080e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
110977431f27SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
11100e6d2581SBarry Smith                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
11110e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
111277431f27SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
11130e6d2581SBarry Smith                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
11140e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
111577431f27SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
11160ef38995SBarry Smith             }
111744cd7ae7SLois Curfman McInnes #else
11180ef38995SBarry Smith             if (a->a[bs2*k + l*bs + j] != 0.0) {
111977431f27SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
11200ef38995SBarry Smith             }
112144cd7ae7SLois Curfman McInnes #endif
112244cd7ae7SLois Curfman McInnes           }
112344cd7ae7SLois Curfman McInnes         }
1124b0a32e0cSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
112544cd7ae7SLois Curfman McInnes       }
112644cd7ae7SLois Curfman McInnes     }
1127b0a32e0cSBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
11280ef38995SBarry Smith   } else {
1129b0a32e0cSBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1130b6490206SBarry Smith     for (i=0; i<a->mbs; i++) {
1131b6490206SBarry Smith       for (j=0; j<bs; j++) {
113277431f27SBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1133b6490206SBarry Smith         for (k=a->i[i]; k<a->i[i+1]; k++) {
1134b6490206SBarry Smith           for (l=0; l<bs; l++) {
1135aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
11360e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
113777431f27SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
11380e6d2581SBarry Smith                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
11390e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
114077431f27SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
11410e6d2581SBarry Smith                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
11420ef38995SBarry Smith             } else {
114377431f27SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
114488685aaeSLois Curfman McInnes             }
114588685aaeSLois Curfman McInnes #else
114677431f27SBarry Smith             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
114788685aaeSLois Curfman McInnes #endif
11482593348eSBarry Smith           }
11492593348eSBarry Smith         }
1150b0a32e0cSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
11512593348eSBarry Smith       }
11522593348eSBarry Smith     }
1153b0a32e0cSBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1154b6490206SBarry Smith   }
1155b0a32e0cSBarry Smith   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
11563a40ed3dSBarry Smith   PetscFunctionReturn(0);
11572593348eSBarry Smith }
11582593348eSBarry Smith 
11594a2ae208SSatish Balay #undef __FUNCT__
11604a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
11616849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
11623270192aSSatish Balay {
116377ed5343SBarry Smith   Mat            A = (Mat) Aa;
11643270192aSSatish Balay   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
11656849ba73SBarry Smith   PetscErrorCode ierr;
1166521d7252SBarry Smith   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
11670e6d2581SBarry Smith   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
11683f1db9ecSBarry Smith   MatScalar      *aa;
1169b0a32e0cSBarry Smith   PetscViewer    viewer;
11703270192aSSatish Balay 
11713a40ed3dSBarry Smith   PetscFunctionBegin;
11723270192aSSatish Balay 
1173b65db4caSBarry Smith   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
117477ed5343SBarry Smith   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
117577ed5343SBarry Smith 
1176b0a32e0cSBarry Smith   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
117777ed5343SBarry Smith 
11783270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1179b0a32e0cSBarry Smith   color = PETSC_DRAW_BLUE;
11803270192aSSatish Balay   for (i=0,row=0; i<mbs; i++,row+=bs) {
11813270192aSSatish Balay     for (j=a->i[i]; j<a->i[i+1]; j++) {
1182273d9f13SBarry Smith       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
11833270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
11843270192aSSatish Balay       aa = a->a + j*bs2;
11853270192aSSatish Balay       for (k=0; k<bs; k++) {
11863270192aSSatish Balay         for (l=0; l<bs; l++) {
11870e6d2581SBarry Smith           if (PetscRealPart(*aa++) >=  0.) continue;
1188b0a32e0cSBarry Smith           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
11893270192aSSatish Balay         }
11903270192aSSatish Balay       }
11913270192aSSatish Balay     }
11923270192aSSatish Balay   }
1193b0a32e0cSBarry Smith   color = PETSC_DRAW_CYAN;
11943270192aSSatish Balay   for (i=0,row=0; i<mbs; i++,row+=bs) {
11953270192aSSatish Balay     for (j=a->i[i]; j<a->i[i+1]; j++) {
1196273d9f13SBarry Smith       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
11973270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
11983270192aSSatish Balay       aa = a->a + j*bs2;
11993270192aSSatish Balay       for (k=0; k<bs; k++) {
12003270192aSSatish Balay         for (l=0; l<bs; l++) {
12010e6d2581SBarry Smith           if (PetscRealPart(*aa++) != 0.) continue;
1202b0a32e0cSBarry Smith           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
12033270192aSSatish Balay         }
12043270192aSSatish Balay       }
12053270192aSSatish Balay     }
12063270192aSSatish Balay   }
12073270192aSSatish Balay 
1208b0a32e0cSBarry Smith   color = PETSC_DRAW_RED;
12093270192aSSatish Balay   for (i=0,row=0; i<mbs; i++,row+=bs) {
12103270192aSSatish Balay     for (j=a->i[i]; j<a->i[i+1]; j++) {
1211273d9f13SBarry Smith       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
12123270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
12133270192aSSatish Balay       aa = a->a + j*bs2;
12143270192aSSatish Balay       for (k=0; k<bs; k++) {
12153270192aSSatish Balay         for (l=0; l<bs; l++) {
12160e6d2581SBarry Smith           if (PetscRealPart(*aa++) <= 0.) continue;
1217b0a32e0cSBarry Smith           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
12183270192aSSatish Balay         }
12193270192aSSatish Balay       }
12203270192aSSatish Balay     }
12213270192aSSatish Balay   }
122277ed5343SBarry Smith   PetscFunctionReturn(0);
122377ed5343SBarry Smith }
12243270192aSSatish Balay 
12254a2ae208SSatish Balay #undef __FUNCT__
12264a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw"
12276849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
122877ed5343SBarry Smith {
1229dfbe8321SBarry Smith   PetscErrorCode ierr;
12300e6d2581SBarry Smith   PetscReal      xl,yl,xr,yr,w,h;
1231b0a32e0cSBarry Smith   PetscDraw      draw;
123277ed5343SBarry Smith   PetscTruth     isnull;
12333270192aSSatish Balay 
123477ed5343SBarry Smith   PetscFunctionBegin;
123577ed5343SBarry Smith 
1236b0a32e0cSBarry Smith   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1237b0a32e0cSBarry Smith   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
123877ed5343SBarry Smith 
123977ed5343SBarry Smith   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1240273d9f13SBarry Smith   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
124177ed5343SBarry Smith   xr += w;    yr += h;  xl = -w;     yl = -h;
1242b0a32e0cSBarry Smith   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1243b0a32e0cSBarry Smith   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
124477ed5343SBarry Smith   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
12453a40ed3dSBarry Smith   PetscFunctionReturn(0);
12463270192aSSatish Balay }
12473270192aSSatish Balay 
12484a2ae208SSatish Balay #undef __FUNCT__
12494a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ"
1250dfbe8321SBarry Smith PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
12512593348eSBarry Smith {
1252dfbe8321SBarry Smith   PetscErrorCode ierr;
125332077d6dSBarry Smith   PetscTruth     iascii,isbinary,isdraw;
12542593348eSBarry Smith 
12553a40ed3dSBarry Smith   PetscFunctionBegin;
125632077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1257fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1258fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
125932077d6dSBarry Smith   if (iascii){
12603a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
12610f5bd95cSBarry Smith   } else if (isbinary) {
12623a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
12630f5bd95cSBarry Smith   } else if (isdraw) {
12643a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
12655cd90555SBarry Smith   } else {
1266a5e6ed63SBarry Smith     Mat B;
1267ceb03754SKris Buschelman     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1268a5e6ed63SBarry Smith     ierr = MatView(B,viewer);CHKERRQ(ierr);
1269a5e6ed63SBarry Smith     ierr = MatDestroy(B);CHKERRQ(ierr);
12702593348eSBarry Smith   }
12713a40ed3dSBarry Smith   PetscFunctionReturn(0);
12722593348eSBarry Smith }
1273b6490206SBarry Smith 
1274cd0e1443SSatish Balay 
12754a2ae208SSatish Balay #undef __FUNCT__
12764a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_SeqBAIJ"
1277c1ac3661SBarry Smith PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1278cd0e1443SSatish Balay {
1279cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1280c1ac3661SBarry Smith   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1281c1ac3661SBarry Smith   PetscInt    *ai = a->i,*ailen = a->ilen;
1282521d7252SBarry Smith   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
12833f1db9ecSBarry Smith   MatScalar   *ap,*aa = a->a,zero = 0.0;
1284cd0e1443SSatish Balay 
12853a40ed3dSBarry Smith   PetscFunctionBegin;
12862d61bbb3SSatish Balay   for (k=0; k<m; k++) { /* loop over rows */
1287cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
128829bbc08cSBarry Smith     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
128977431f27SBarry Smith     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
12902d61bbb3SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
12912c3acbe9SBarry Smith     nrow = ailen[brow];
12922d61bbb3SSatish Balay     for (l=0; l<n; l++) { /* loop over columns */
129329bbc08cSBarry Smith       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
129477431f27SBarry Smith       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
12952d61bbb3SSatish Balay       col  = in[l] ;
12962d61bbb3SSatish Balay       bcol = col/bs;
12972d61bbb3SSatish Balay       cidx = col%bs;
12982d61bbb3SSatish Balay       ridx = row%bs;
12992d61bbb3SSatish Balay       high = nrow;
13002d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
13012d61bbb3SSatish Balay       while (high-low > 5) {
1302cd0e1443SSatish Balay         t = (low+high)/2;
1303cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
1304cd0e1443SSatish Balay         else             low  = t;
1305cd0e1443SSatish Balay       }
1306cd0e1443SSatish Balay       for (i=low; i<high; i++) {
1307cd0e1443SSatish Balay         if (rp[i] > bcol) break;
1308cd0e1443SSatish Balay         if (rp[i] == bcol) {
13092d61bbb3SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
13102d61bbb3SSatish Balay           goto finished;
1311cd0e1443SSatish Balay         }
1312cd0e1443SSatish Balay       }
13132d61bbb3SSatish Balay       *v++ = zero;
13142d61bbb3SSatish Balay       finished:;
1315cd0e1443SSatish Balay     }
1316cd0e1443SSatish Balay   }
13173a40ed3dSBarry Smith   PetscFunctionReturn(0);
1318cd0e1443SSatish Balay }
1319cd0e1443SSatish Balay 
132095d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
13214a2ae208SSatish Balay #undef __FUNCT__
13224a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1323c1ac3661SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
132495d5f7c2SBarry Smith {
1325563b5814SBarry Smith   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1326dfbe8321SBarry Smith   PetscErrorCode ierr;
1327c1ac3661SBarry Smith   PetscInt       i,N = m*n*b->bs2;
1328563b5814SBarry Smith   MatScalar      *vsingle;
132995d5f7c2SBarry Smith 
133095d5f7c2SBarry Smith   PetscFunctionBegin;
1331563b5814SBarry Smith   if (N > b->setvalueslen) {
1332563b5814SBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1333b0a32e0cSBarry Smith     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1334563b5814SBarry Smith     b->setvalueslen  = N;
1335563b5814SBarry Smith   }
1336563b5814SBarry Smith   vsingle = b->setvaluescopy;
133795d5f7c2SBarry Smith   for (i=0; i<N; i++) {
133895d5f7c2SBarry Smith     vsingle[i] = v[i];
133995d5f7c2SBarry Smith   }
134095d5f7c2SBarry Smith   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
134195d5f7c2SBarry Smith   PetscFunctionReturn(0);
134295d5f7c2SBarry Smith }
134395d5f7c2SBarry Smith #endif
134495d5f7c2SBarry Smith 
13452d61bbb3SSatish Balay 
13464a2ae208SSatish Balay #undef __FUNCT__
13474a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1348c1ac3661SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
134992c4ed94SBarry Smith {
135092c4ed94SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1351e2ee6c50SBarry Smith   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1352c1ac3661SBarry Smith   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
13536849ba73SBarry Smith   PetscErrorCode  ierr;
1354521d7252SBarry Smith   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1355273d9f13SBarry Smith   PetscTruth      roworiented=a->roworiented;
1356f15d580aSBarry Smith   const MatScalar *value = v;
1357f15d580aSBarry Smith   MatScalar       *ap,*aa = a->a,*bap;
135892c4ed94SBarry Smith 
13593a40ed3dSBarry Smith   PetscFunctionBegin;
13600e324ae4SSatish Balay   if (roworiented) {
13610e324ae4SSatish Balay     stepval = (n-1)*bs;
13620e324ae4SSatish Balay   } else {
13630e324ae4SSatish Balay     stepval = (m-1)*bs;
13640e324ae4SSatish Balay   }
136592c4ed94SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
136692c4ed94SBarry Smith     row  = im[k];
13675ef9f2a5SBarry Smith     if (row < 0) continue;
13682515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
136977431f27SBarry Smith     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
137092c4ed94SBarry Smith #endif
137192c4ed94SBarry Smith     rp   = aj + ai[row];
137292c4ed94SBarry Smith     ap   = aa + bs2*ai[row];
137392c4ed94SBarry Smith     rmax = imax[row];
137492c4ed94SBarry Smith     nrow = ailen[row];
137592c4ed94SBarry Smith     low  = 0;
1376c71e6ed7SBarry Smith     high = nrow;
137792c4ed94SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
13785ef9f2a5SBarry Smith       if (in[l] < 0) continue;
13792515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
138077431f27SBarry Smith       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
138192c4ed94SBarry Smith #endif
138292c4ed94SBarry Smith       col = in[l];
138392c4ed94SBarry Smith       if (roworiented) {
13840e324ae4SSatish Balay         value = v + k*(stepval+bs)*bs + l*bs;
13850e324ae4SSatish Balay       } else {
13860e324ae4SSatish Balay         value = v + l*(stepval+bs)*bs + k*bs;
138792c4ed94SBarry Smith       }
1388c71e6ed7SBarry Smith       if (col < lastcol) low = 0; else high = nrow;
1389e2ee6c50SBarry Smith       lastcol = col;
139092c4ed94SBarry Smith       while (high-low > 7) {
139192c4ed94SBarry Smith         t = (low+high)/2;
139292c4ed94SBarry Smith         if (rp[t] > col) high = t;
139392c4ed94SBarry Smith         else             low  = t;
139492c4ed94SBarry Smith       }
139592c4ed94SBarry Smith       for (i=low; i<high; i++) {
139692c4ed94SBarry Smith         if (rp[i] > col) break;
139792c4ed94SBarry Smith         if (rp[i] == col) {
13988a84c255SSatish Balay           bap  = ap +  bs2*i;
13990e324ae4SSatish Balay           if (roworiented) {
14008a84c255SSatish Balay             if (is == ADD_VALUES) {
1401dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1402dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs) {
14038a84c255SSatish Balay                   bap[jj] += *value++;
1404dd9472c6SBarry Smith                 }
1405dd9472c6SBarry Smith               }
14060e324ae4SSatish Balay             } else {
1407dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1408dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs) {
14090e324ae4SSatish Balay                   bap[jj] = *value++;
14108a84c255SSatish Balay                 }
1411dd9472c6SBarry Smith               }
1412dd9472c6SBarry Smith             }
14130e324ae4SSatish Balay           } else {
14140e324ae4SSatish Balay             if (is == ADD_VALUES) {
1415dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1416dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++) {
14170e324ae4SSatish Balay                   *bap++ += *value++;
1418dd9472c6SBarry Smith                 }
1419dd9472c6SBarry Smith               }
14200e324ae4SSatish Balay             } else {
1421dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1422dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++) {
14230e324ae4SSatish Balay                   *bap++  = *value++;
14240e324ae4SSatish Balay                 }
14258a84c255SSatish Balay               }
1426dd9472c6SBarry Smith             }
1427dd9472c6SBarry Smith           }
1428f1241b54SBarry Smith           goto noinsert2;
142992c4ed94SBarry Smith         }
143092c4ed94SBarry Smith       }
143189280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
143277431f27SBarry Smith       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
143392c4ed94SBarry Smith       if (nrow >= rmax) {
143492c4ed94SBarry Smith         /* there is no extra room in row, therefore enlarge */
1435c1ac3661SBarry Smith         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
14363f1db9ecSBarry Smith         MatScalar *new_a;
143792c4ed94SBarry Smith 
143877431f27SBarry Smith         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
143989280ab3SLois Curfman McInnes 
144092c4ed94SBarry Smith         /* malloc new storage space */
1441c1ac3661SBarry Smith         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1442b0a32e0cSBarry Smith 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1443690b6cddSBarry Smith         new_j   = (PetscInt*)(new_a + bs2*new_nz);
144492c4ed94SBarry Smith         new_i   = new_j + new_nz;
144592c4ed94SBarry Smith 
144692c4ed94SBarry Smith         /* copy over old data into new slots */
144792c4ed94SBarry Smith         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
144892c4ed94SBarry Smith         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1449c1ac3661SBarry Smith         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
145092c4ed94SBarry Smith         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1451c1ac3661SBarry Smith         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1452549d3d68SSatish Balay         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1453549d3d68SSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1454549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
145592c4ed94SBarry Smith         /* free up old matrix storage */
1456606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);
1457606d414cSSatish Balay         if (!a->singlemalloc) {
1458606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr);
1459606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);
1460606d414cSSatish Balay         }
146192c4ed94SBarry Smith         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1462c4992f7dSBarry Smith         a->singlemalloc = PETSC_TRUE;
146392c4ed94SBarry Smith 
146492c4ed94SBarry Smith         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
146592c4ed94SBarry Smith         rmax = imax[row] = imax[row] + CHUNKSIZE;
146652e6d16bSBarry Smith         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
146792c4ed94SBarry Smith         a->maxnz += bs2*CHUNKSIZE;
146892c4ed94SBarry Smith         a->reallocs++;
146992c4ed94SBarry Smith         a->nz++;
147092c4ed94SBarry Smith       }
147192c4ed94SBarry Smith       N = nrow++ - 1;
147292c4ed94SBarry Smith       /* shift up all the later entries in this row */
147392c4ed94SBarry Smith       for (ii=N; ii>=i; ii--) {
147492c4ed94SBarry Smith         rp[ii+1] = rp[ii];
1475549d3d68SSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
147692c4ed94SBarry Smith       }
1477549d3d68SSatish Balay       if (N >= i) {
1478549d3d68SSatish Balay         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1479549d3d68SSatish Balay       }
148092c4ed94SBarry Smith       rp[i] = col;
14818a84c255SSatish Balay       bap   = ap +  bs2*i;
14820e324ae4SSatish Balay       if (roworiented) {
1483dd9472c6SBarry Smith         for (ii=0; ii<bs; ii++,value+=stepval) {
1484dd9472c6SBarry Smith           for (jj=ii; jj<bs2; jj+=bs) {
14850e324ae4SSatish Balay             bap[jj] = *value++;
1486dd9472c6SBarry Smith           }
1487dd9472c6SBarry Smith         }
14880e324ae4SSatish Balay       } else {
1489dd9472c6SBarry Smith         for (ii=0; ii<bs; ii++,value+=stepval) {
1490dd9472c6SBarry Smith           for (jj=0; jj<bs; jj++) {
14910e324ae4SSatish Balay             *bap++  = *value++;
14920e324ae4SSatish Balay           }
1493dd9472c6SBarry Smith         }
1494dd9472c6SBarry Smith       }
1495f1241b54SBarry Smith       noinsert2:;
149692c4ed94SBarry Smith       low = i;
149792c4ed94SBarry Smith     }
149892c4ed94SBarry Smith     ailen[row] = nrow;
149992c4ed94SBarry Smith   }
15003a40ed3dSBarry Smith   PetscFunctionReturn(0);
150192c4ed94SBarry Smith }
150226e093fcSHong Zhang 
15034a2ae208SSatish Balay #undef __FUNCT__
15044a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1505dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1506584200bdSSatish Balay {
1507584200bdSSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1508c1ac3661SBarry Smith   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1509c1ac3661SBarry Smith   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
15106849ba73SBarry Smith   PetscErrorCode ierr;
1511c1ac3661SBarry Smith   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
15123f1db9ecSBarry Smith   MatScalar      *aa = a->a,*ap;
15133447b6efSHong Zhang   PetscReal      ratio=0.6;
1514584200bdSSatish Balay 
15153a40ed3dSBarry Smith   PetscFunctionBegin;
15163a40ed3dSBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1517584200bdSSatish Balay 
151843ee02c3SBarry Smith   if (m) rmax = ailen[0];
1519584200bdSSatish Balay   for (i=1; i<mbs; i++) {
1520584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
1521584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
1522d402145bSBarry Smith     rmax   = PetscMax(rmax,ailen[i]);
1523584200bdSSatish Balay     if (fshift) {
1524a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1525584200bdSSatish Balay       N = ailen[i];
1526584200bdSSatish Balay       for (j=0; j<N; j++) {
1527584200bdSSatish Balay         ip[j-fshift] = ip[j];
1528549d3d68SSatish Balay         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1529584200bdSSatish Balay       }
1530584200bdSSatish Balay     }
1531584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
1532584200bdSSatish Balay   }
1533584200bdSSatish Balay   if (mbs) {
1534584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
1535584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1536584200bdSSatish Balay   }
1537584200bdSSatish Balay   /* reset ilen and imax for each row */
1538584200bdSSatish Balay   for (i=0; i<mbs; i++) {
1539584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
1540584200bdSSatish Balay   }
1541a7c10996SSatish Balay   a->nz = ai[mbs];
1542584200bdSSatish Balay 
1543584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
1544b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
1545584200bdSSatish Balay   if (fshift && a->diag) {
1546606d414cSSatish Balay     ierr = PetscFree(a->diag);CHKERRQ(ierr);
154752e6d16bSBarry Smith     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1548584200bdSSatish Balay     a->diag = 0;
1549584200bdSSatish Balay   }
155063ba0a88SBarry Smith   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
155163ba0a88SBarry Smith   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
155263ba0a88SBarry Smith   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1553e2f3b5e9SSatish Balay   a->reallocs          = 0;
15540e6d2581SBarry Smith   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1555cf4441caSHong Zhang 
1556cb5d8e9eSHong Zhang   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
15572f53aa61SHong Zhang   if (a->compressedrow.use){
1558317fbc4cSHong Zhang     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
155973e7a558SHong Zhang   }
156088e51ccdSHong Zhang 
156188e51ccdSHong Zhang   A->same_nonzero = PETSC_TRUE;
15623a40ed3dSBarry Smith   PetscFunctionReturn(0);
1563584200bdSSatish Balay }
1564584200bdSSatish Balay 
1565bea157c4SSatish Balay /*
1566bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
1567bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1568bea157c4SSatish Balay    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1569bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
1570bea157c4SSatish Balay */
15714a2ae208SSatish Balay #undef __FUNCT__
15724a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1573c1ac3661SBarry Smith static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1574d9b7c43dSSatish Balay {
1575c1ac3661SBarry Smith   PetscInt   i,j,k,row;
1576bea157c4SSatish Balay   PetscTruth flg;
15773a40ed3dSBarry Smith 
1578433994e6SBarry Smith   PetscFunctionBegin;
1579bea157c4SSatish Balay   for (i=0,j=0; i<n; j++) {
1580bea157c4SSatish Balay     row = idx[i];
1581bea157c4SSatish Balay     if (row%bs!=0) { /* Not the begining of a block */
1582bea157c4SSatish Balay       sizes[j] = 1;
1583bea157c4SSatish Balay       i++;
1584e4fda26cSSatish Balay     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1585bea157c4SSatish Balay       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1586bea157c4SSatish Balay       i++;
1587bea157c4SSatish Balay     } else { /* Begining of the block, so check if the complete block exists */
1588bea157c4SSatish Balay       flg = PETSC_TRUE;
1589bea157c4SSatish Balay       for (k=1; k<bs; k++) {
1590bea157c4SSatish Balay         if (row+k != idx[i+k]) { /* break in the block */
1591bea157c4SSatish Balay           flg = PETSC_FALSE;
1592bea157c4SSatish Balay           break;
1593d9b7c43dSSatish Balay         }
1594bea157c4SSatish Balay       }
1595abc0a331SBarry Smith       if (flg) { /* No break in the bs */
1596bea157c4SSatish Balay         sizes[j] = bs;
1597bea157c4SSatish Balay         i+= bs;
1598bea157c4SSatish Balay       } else {
1599bea157c4SSatish Balay         sizes[j] = 1;
1600bea157c4SSatish Balay         i++;
1601bea157c4SSatish Balay       }
1602bea157c4SSatish Balay     }
1603bea157c4SSatish Balay   }
1604bea157c4SSatish Balay   *bs_max = j;
16053a40ed3dSBarry Smith   PetscFunctionReturn(0);
1606d9b7c43dSSatish Balay }
1607d9b7c43dSSatish Balay 
16084a2ae208SSatish Balay #undef __FUNCT__
16094a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1610dfbe8321SBarry Smith PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1611d9b7c43dSSatish Balay {
1612d9b7c43dSSatish Balay   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1613dfbe8321SBarry Smith   PetscErrorCode ierr;
1614c1ac3661SBarry Smith   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1615521d7252SBarry Smith   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
161687828ca2SBarry Smith   PetscScalar    zero = 0.0;
16173f1db9ecSBarry Smith   MatScalar      *aa;
1618d9b7c43dSSatish Balay 
16193a40ed3dSBarry Smith   PetscFunctionBegin;
1620d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1621b9b97703SBarry Smith   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1622d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1623d9b7c43dSSatish Balay 
1624bea157c4SSatish Balay   /* allocate memory for rows,sizes */
1625c1ac3661SBarry Smith   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1626bea157c4SSatish Balay   sizes = rows + is_n;
1627bea157c4SSatish Balay 
1628563b5814SBarry Smith   /* copy IS values to rows, and sort them */
1629bea157c4SSatish Balay   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1630bea157c4SSatish Balay   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1631dffd3267SBarry Smith   if (baij->keepzeroedrows) {
1632dffd3267SBarry Smith     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1633dffd3267SBarry Smith     bs_max = is_n;
163488e51ccdSHong Zhang     A->same_nonzero = PETSC_TRUE;
1635dffd3267SBarry Smith   } else {
1636bea157c4SSatish Balay     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
163788e51ccdSHong Zhang     A->same_nonzero = PETSC_FALSE;
1638dffd3267SBarry Smith   }
1639b97a56abSSatish Balay   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1640bea157c4SSatish Balay 
1641bea157c4SSatish Balay   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1642bea157c4SSatish Balay     row   = rows[j];
164377431f27SBarry Smith     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1644bea157c4SSatish Balay     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1645bea157c4SSatish Balay     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1646dffd3267SBarry Smith     if (sizes[i] == bs && !baij->keepzeroedrows) {
1647bea157c4SSatish Balay       if (diag) {
1648bea157c4SSatish Balay         if (baij->ilen[row/bs] > 0) {
1649bea157c4SSatish Balay           baij->ilen[row/bs]       = 1;
1650bea157c4SSatish Balay           baij->j[baij->i[row/bs]] = row/bs;
1651bea157c4SSatish Balay           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1652a07cd24cSSatish Balay         }
1653563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
1654bea157c4SSatish Balay         for (k=0; k<bs; k++) {
1655bea157c4SSatish Balay           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1656bea157c4SSatish Balay         }
1657bea157c4SSatish Balay       } else { /* (!diag) */
1658bea157c4SSatish Balay         baij->ilen[row/bs] = 0;
1659bea157c4SSatish Balay       } /* end (!diag) */
1660bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
1661aa482453SBarry Smith #if defined (PETSC_USE_DEBUG)
1662634064b4SBarry Smith       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1663bea157c4SSatish Balay #endif
1664bea157c4SSatish Balay       for (k=0; k<count; k++) {
1665d9b7c43dSSatish Balay         aa[0] =  zero;
1666d9b7c43dSSatish Balay         aa    += bs;
1667d9b7c43dSSatish Balay       }
1668d9b7c43dSSatish Balay       if (diag) {
1669f830108cSBarry Smith         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1670d9b7c43dSSatish Balay       }
1671d9b7c43dSSatish Balay     }
1672bea157c4SSatish Balay   }
1673bea157c4SSatish Balay 
1674606d414cSSatish Balay   ierr = PetscFree(rows);CHKERRQ(ierr);
16759a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16763a40ed3dSBarry Smith   PetscFunctionReturn(0);
1677d9b7c43dSSatish Balay }
16781c351548SSatish Balay 
16794a2ae208SSatish Balay #undef __FUNCT__
16804a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_SeqBAIJ"
1681c1ac3661SBarry Smith PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
16822d61bbb3SSatish Balay {
16832d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1684e2ee6c50SBarry Smith   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1685c1ac3661SBarry Smith   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1686521d7252SBarry Smith   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
16876849ba73SBarry Smith   PetscErrorCode ierr;
1688c1ac3661SBarry Smith   PetscInt       ridx,cidx,bs2=a->bs2;
1689273d9f13SBarry Smith   PetscTruth     roworiented=a->roworiented;
16903f1db9ecSBarry Smith   MatScalar      *ap,value,*aa=a->a,*bap;
16912d61bbb3SSatish Balay 
16922d61bbb3SSatish Balay   PetscFunctionBegin;
16932d61bbb3SSatish Balay   for (k=0; k<m; k++) { /* loop over added rows */
16942d61bbb3SSatish Balay     row  = im[k]; brow = row/bs;
16955ef9f2a5SBarry Smith     if (row < 0) continue;
16962515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
169777431f27SBarry Smith     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
16982d61bbb3SSatish Balay #endif
16992d61bbb3SSatish Balay     rp   = aj + ai[brow];
17002d61bbb3SSatish Balay     ap   = aa + bs2*ai[brow];
17012d61bbb3SSatish Balay     rmax = imax[brow];
17022d61bbb3SSatish Balay     nrow = ailen[brow];
17032d61bbb3SSatish Balay     low  = 0;
1704c71e6ed7SBarry Smith     high = nrow;
17052d61bbb3SSatish Balay     for (l=0; l<n; l++) { /* loop over added columns */
17065ef9f2a5SBarry Smith       if (in[l] < 0) continue;
17072515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
170877431f27SBarry Smith       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
17092d61bbb3SSatish Balay #endif
17102d61bbb3SSatish Balay       col = in[l]; bcol = col/bs;
17112d61bbb3SSatish Balay       ridx = row % bs; cidx = col % bs;
17122d61bbb3SSatish Balay       if (roworiented) {
17135ef9f2a5SBarry Smith         value = v[l + k*n];
17142d61bbb3SSatish Balay       } else {
17152d61bbb3SSatish Balay         value = v[k + l*m];
17162d61bbb3SSatish Balay       }
1717c71e6ed7SBarry Smith       if (col < lastcol) low = 0; else high = nrow;
1718e2ee6c50SBarry Smith       lastcol = col;
17192d61bbb3SSatish Balay       while (high-low > 7) {
17202d61bbb3SSatish Balay         t = (low+high)/2;
17212d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
17222d61bbb3SSatish Balay         else              low  = t;
17232d61bbb3SSatish Balay       }
17242d61bbb3SSatish Balay       for (i=low; i<high; i++) {
17252d61bbb3SSatish Balay         if (rp[i] > bcol) break;
17262d61bbb3SSatish Balay         if (rp[i] == bcol) {
17272d61bbb3SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
17282d61bbb3SSatish Balay           if (is == ADD_VALUES) *bap += value;
17292d61bbb3SSatish Balay           else                  *bap  = value;
17302d61bbb3SSatish Balay           goto noinsert1;
17312d61bbb3SSatish Balay         }
17322d61bbb3SSatish Balay       }
17332d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
173477431f27SBarry Smith       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
17352d61bbb3SSatish Balay       if (nrow >= rmax) {
17362d61bbb3SSatish Balay         /* there is no extra room in row, therefore enlarge */
1737c1ac3661SBarry Smith         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
17383f1db9ecSBarry Smith         MatScalar *new_a;
17392d61bbb3SSatish Balay 
174077431f27SBarry Smith         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
17412d61bbb3SSatish Balay 
17422d61bbb3SSatish Balay         /* Malloc new storage space */
1743c1ac3661SBarry Smith         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1744b0a32e0cSBarry Smith 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1745c1ac3661SBarry Smith         new_j   = (PetscInt*)(new_a + bs2*new_nz);
17462d61bbb3SSatish Balay         new_i   = new_j + new_nz;
17472d61bbb3SSatish Balay 
17482d61bbb3SSatish Balay         /* copy over old data into new slots */
17492d61bbb3SSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
17502d61bbb3SSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1751c1ac3661SBarry Smith         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
17522d61bbb3SSatish Balay         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1753c1ac3661SBarry Smith         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1754549d3d68SSatish Balay         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1755549d3d68SSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1756549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
17572d61bbb3SSatish Balay         /* free up old matrix storage */
1758606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);
1759606d414cSSatish Balay         if (!a->singlemalloc) {
1760606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr);
1761606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);
1762606d414cSSatish Balay         }
17632d61bbb3SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1764c4992f7dSBarry Smith         a->singlemalloc = PETSC_TRUE;
17652d61bbb3SSatish Balay 
17662d61bbb3SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
17672d61bbb3SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
176852e6d16bSBarry Smith         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
17692d61bbb3SSatish Balay         a->maxnz += bs2*CHUNKSIZE;
17702d61bbb3SSatish Balay         a->reallocs++;
17712d61bbb3SSatish Balay         a->nz++;
17722d61bbb3SSatish Balay       }
17732d61bbb3SSatish Balay       N = nrow++ - 1;
17742d61bbb3SSatish Balay       /* shift up all the later entries in this row */
17752d61bbb3SSatish Balay       for (ii=N; ii>=i; ii--) {
17762d61bbb3SSatish Balay         rp[ii+1] = rp[ii];
1777549d3d68SSatish Balay         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
17782d61bbb3SSatish Balay       }
1779549d3d68SSatish Balay       if (N>=i) {
1780549d3d68SSatish Balay         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1781549d3d68SSatish Balay       }
17822d61bbb3SSatish Balay       rp[i]                      = bcol;
17832d61bbb3SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
17842d61bbb3SSatish Balay       noinsert1:;
17852d61bbb3SSatish Balay       low = i;
17862d61bbb3SSatish Balay     }
17872d61bbb3SSatish Balay     ailen[brow] = nrow;
17882d61bbb3SSatish Balay   }
178988e51ccdSHong Zhang   A->same_nonzero = PETSC_FALSE;
17902d61bbb3SSatish Balay   PetscFunctionReturn(0);
17912d61bbb3SSatish Balay }
17922d61bbb3SSatish Balay 
17932d61bbb3SSatish Balay 
17944a2ae208SSatish Balay #undef __FUNCT__
17954a2ae208SSatish Balay #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1796dfbe8321SBarry Smith PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
17972d61bbb3SSatish Balay {
17982d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
17992d61bbb3SSatish Balay   Mat            outA;
1800dfbe8321SBarry Smith   PetscErrorCode ierr;
1801667159a5SBarry Smith   PetscTruth     row_identity,col_identity;
18022d61bbb3SSatish Balay 
18032d61bbb3SSatish Balay   PetscFunctionBegin;
1804d3d32019SBarry Smith   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1805667159a5SBarry Smith   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1806667159a5SBarry Smith   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1807667159a5SBarry Smith   if (!row_identity || !col_identity) {
1808634064b4SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1809667159a5SBarry Smith   }
18102d61bbb3SSatish Balay 
18112d61bbb3SSatish Balay   outA          = inA;
18122d61bbb3SSatish Balay   inA->factor   = FACTOR_LU;
18132d61bbb3SSatish Balay 
18142d61bbb3SSatish Balay   if (!a->diag) {
1815c4992f7dSBarry Smith     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
18162d61bbb3SSatish Balay   }
1817cf242676SKris Buschelman 
1818c38d4ed2SBarry Smith   a->row        = row;
1819c38d4ed2SBarry Smith   a->col        = col;
1820c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1821c38d4ed2SBarry Smith   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1822c38d4ed2SBarry Smith 
1823c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
18244c49b128SBarry Smith   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
182552e6d16bSBarry Smith   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1826c38d4ed2SBarry Smith 
1827cf242676SKris Buschelman   /*
1828cf242676SKris Buschelman       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1829cf242676SKris Buschelman       for ILU(0) factorization with natural ordering
1830cf242676SKris Buschelman   */
1831521d7252SBarry Smith   if (inA->bs < 8) {
1832cf242676SKris Buschelman     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1833cf242676SKris Buschelman   } else {
1834c38d4ed2SBarry Smith     if (!a->solve_work) {
1835521d7252SBarry Smith       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
183652e6d16bSBarry Smith       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1837c38d4ed2SBarry Smith     }
18382d61bbb3SSatish Balay   }
18392d61bbb3SSatish Balay 
1840af281ebdSHong Zhang   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1841667159a5SBarry Smith 
18422d61bbb3SSatish Balay   PetscFunctionReturn(0);
18432d61bbb3SSatish Balay }
18444a2ae208SSatish Balay #undef __FUNCT__
18454a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1846dfbe8321SBarry Smith PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1847ba4ca20aSSatish Balay {
1848c38d4ed2SBarry Smith   static PetscTruth called = PETSC_FALSE;
1849ba4ca20aSSatish Balay   MPI_Comm          comm = A->comm;
1850dfbe8321SBarry Smith   PetscErrorCode    ierr;
1851ba4ca20aSSatish Balay 
18523a40ed3dSBarry Smith   PetscFunctionBegin;
1853c38d4ed2SBarry Smith   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1854d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1855d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
18563a40ed3dSBarry Smith   PetscFunctionReturn(0);
1857ba4ca20aSSatish Balay }
1858d9b7c43dSSatish Balay 
1859fb2e594dSBarry Smith EXTERN_C_BEGIN
18604a2ae208SSatish Balay #undef __FUNCT__
18614a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1862*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
186327a8da17SBarry Smith {
186427a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1865c1ac3661SBarry Smith   PetscInt    i,nz,nbs;
186627a8da17SBarry Smith 
186727a8da17SBarry Smith   PetscFunctionBegin;
186814db4f74SSatish Balay   nz  = baij->maxnz/baij->bs2;
186914db4f74SSatish Balay   nbs = baij->nbs;
187027a8da17SBarry Smith   for (i=0; i<nz; i++) {
187127a8da17SBarry Smith     baij->j[i] = indices[i];
187227a8da17SBarry Smith   }
187327a8da17SBarry Smith   baij->nz = nz;
187414db4f74SSatish Balay   for (i=0; i<nbs; i++) {
187527a8da17SBarry Smith     baij->ilen[i] = baij->imax[i];
187627a8da17SBarry Smith   }
187727a8da17SBarry Smith 
187827a8da17SBarry Smith   PetscFunctionReturn(0);
187927a8da17SBarry Smith }
1880fb2e594dSBarry Smith EXTERN_C_END
188127a8da17SBarry Smith 
18824a2ae208SSatish Balay #undef __FUNCT__
18834a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
188427a8da17SBarry Smith /*@
188527a8da17SBarry Smith     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
188627a8da17SBarry Smith        in the matrix.
188727a8da17SBarry Smith 
188827a8da17SBarry Smith   Input Parameters:
188927a8da17SBarry Smith +  mat - the SeqBAIJ matrix
189027a8da17SBarry Smith -  indices - the column indices
189127a8da17SBarry Smith 
189215091d37SBarry Smith   Level: advanced
189315091d37SBarry Smith 
189427a8da17SBarry Smith   Notes:
189527a8da17SBarry Smith     This can be called if you have precomputed the nonzero structure of the
189627a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
189727a8da17SBarry Smith   of the MatSetValues() operation.
189827a8da17SBarry Smith 
189927a8da17SBarry Smith     You MUST have set the correct numbers of nonzeros per row in the call to
1900d1be2dadSMatthew Knepley   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
190127a8da17SBarry Smith 
190227a8da17SBarry Smith     MUST be called before any calls to MatSetValues();
190327a8da17SBarry Smith 
190427a8da17SBarry Smith @*/
1905*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
190627a8da17SBarry Smith {
1907c1ac3661SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
190827a8da17SBarry Smith 
190927a8da17SBarry Smith   PetscFunctionBegin;
19104482741eSBarry Smith   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
19114482741eSBarry Smith   PetscValidPointer(indices,2);
1912c134de8dSSatish Balay   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
191327a8da17SBarry Smith   if (f) {
191427a8da17SBarry Smith     ierr = (*f)(mat,indices);CHKERRQ(ierr);
191527a8da17SBarry Smith   } else {
1916634064b4SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
191727a8da17SBarry Smith   }
191827a8da17SBarry Smith   PetscFunctionReturn(0);
191927a8da17SBarry Smith }
192027a8da17SBarry Smith 
19214a2ae208SSatish Balay #undef __FUNCT__
19224a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1923dfbe8321SBarry Smith PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1924273d9f13SBarry Smith {
1925273d9f13SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1926dfbe8321SBarry Smith   PetscErrorCode ierr;
1927c1ac3661SBarry Smith   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1928273d9f13SBarry Smith   PetscReal      atmp;
192987828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
1930273d9f13SBarry Smith   MatScalar      *aa;
1931c1ac3661SBarry Smith   PetscInt       ncols,brow,krow,kcol;
1932273d9f13SBarry Smith 
1933273d9f13SBarry Smith   PetscFunctionBegin;
1934273d9f13SBarry Smith   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1935521d7252SBarry Smith   bs   = A->bs;
1936273d9f13SBarry Smith   aa   = a->a;
1937273d9f13SBarry Smith   ai   = a->i;
1938273d9f13SBarry Smith   aj   = a->j;
1939273d9f13SBarry Smith   mbs = a->mbs;
1940273d9f13SBarry Smith 
1941273d9f13SBarry Smith   ierr = VecSet(&zero,v);CHKERRQ(ierr);
19421ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1943273d9f13SBarry Smith   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1944273d9f13SBarry Smith   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1945273d9f13SBarry Smith   for (i=0; i<mbs; i++) {
1946273d9f13SBarry Smith     ncols = ai[1] - ai[0]; ai++;
1947273d9f13SBarry Smith     brow  = bs*i;
1948273d9f13SBarry Smith     for (j=0; j<ncols; j++){
1949273d9f13SBarry Smith       /* bcol = bs*(*aj); */
1950273d9f13SBarry Smith       for (kcol=0; kcol<bs; kcol++){
1951273d9f13SBarry Smith         for (krow=0; krow<bs; krow++){
1952273d9f13SBarry Smith           atmp = PetscAbsScalar(*aa); aa++;
1953273d9f13SBarry Smith           row = brow + krow;    /* row index */
1954273d9f13SBarry Smith           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1955273d9f13SBarry Smith           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1956273d9f13SBarry Smith         }
1957273d9f13SBarry Smith       }
1958273d9f13SBarry Smith       aj++;
1959273d9f13SBarry Smith     }
1960273d9f13SBarry Smith   }
19611ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1962273d9f13SBarry Smith   PetscFunctionReturn(0);
1963273d9f13SBarry Smith }
1964273d9f13SBarry Smith 
19654a2ae208SSatish Balay #undef __FUNCT__
19664a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1967dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1968273d9f13SBarry Smith {
1969dfbe8321SBarry Smith   PetscErrorCode ierr;
1970273d9f13SBarry Smith 
1971273d9f13SBarry Smith   PetscFunctionBegin;
1972273d9f13SBarry Smith   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1973273d9f13SBarry Smith   PetscFunctionReturn(0);
1974273d9f13SBarry Smith }
1975273d9f13SBarry Smith 
19764a2ae208SSatish Balay #undef __FUNCT__
19774a2ae208SSatish Balay #define __FUNCT__ "MatGetArray_SeqBAIJ"
1978dfbe8321SBarry Smith PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1979f2a5309cSSatish Balay {
1980f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1981f2a5309cSSatish Balay   PetscFunctionBegin;
1982f2a5309cSSatish Balay   *array = a->a;
1983f2a5309cSSatish Balay   PetscFunctionReturn(0);
1984f2a5309cSSatish Balay }
1985f2a5309cSSatish Balay 
19864a2ae208SSatish Balay #undef __FUNCT__
19874a2ae208SSatish Balay #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1988dfbe8321SBarry Smith PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1989f2a5309cSSatish Balay {
1990f2a5309cSSatish Balay   PetscFunctionBegin;
1991f2a5309cSSatish Balay   PetscFunctionReturn(0);
1992f2a5309cSSatish Balay }
1993f2a5309cSSatish Balay 
199442ee4b1aSHong Zhang #include "petscblaslapack.h"
199542ee4b1aSHong Zhang #undef __FUNCT__
199642ee4b1aSHong Zhang #define __FUNCT__ "MatAXPY_SeqBAIJ"
1997dfbe8321SBarry Smith PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
199842ee4b1aSHong Zhang {
199942ee4b1aSHong Zhang   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2000dfbe8321SBarry Smith   PetscErrorCode ierr;
2001521d7252SBarry Smith   PetscInt       i,bs=Y->bs,j,bs2;
20024ce68768SBarry Smith   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
200342ee4b1aSHong Zhang 
200442ee4b1aSHong Zhang   PetscFunctionBegin;
200542ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
200671044d3cSBarry Smith     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
2007c537a176SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2008c4319e64SHong Zhang     if (y->xtoy && y->XtoY != X) {
2009c4319e64SHong Zhang       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2010c4319e64SHong Zhang       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2011c537a176SHong Zhang     }
2012c4319e64SHong Zhang     if (!y->xtoy) { /* get xtoy */
2013c4319e64SHong Zhang       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2014c4319e64SHong Zhang       y->XtoY = X;
2015c537a176SHong Zhang     }
2016c4319e64SHong Zhang     bs2 = bs*bs;
2017c537a176SHong Zhang     for (i=0; i<x->nz; i++) {
2018c4319e64SHong Zhang       j = 0;
2019c4319e64SHong Zhang       while (j < bs2){
20206550863bSHong Zhang         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2021c4319e64SHong Zhang         j++;
2022c537a176SHong Zhang       }
2023c4319e64SHong Zhang     }
202463ba0a88SBarry Smith     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
202542ee4b1aSHong Zhang   } else {
202642ee4b1aSHong Zhang     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
202742ee4b1aSHong Zhang   }
202842ee4b1aSHong Zhang   PetscFunctionReturn(0);
202942ee4b1aSHong Zhang }
203042ee4b1aSHong Zhang 
20312593348eSBarry Smith /* -------------------------------------------------------------------*/
2032cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2033cc2dc46cSBarry Smith        MatGetRow_SeqBAIJ,
2034cc2dc46cSBarry Smith        MatRestoreRow_SeqBAIJ,
2035cc2dc46cSBarry Smith        MatMult_SeqBAIJ_N,
203697304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N,
20377c922b88SBarry Smith        MatMultTranspose_SeqBAIJ,
20387c922b88SBarry Smith        MatMultTransposeAdd_SeqBAIJ,
2039cc2dc46cSBarry Smith        MatSolve_SeqBAIJ_N,
2040cc2dc46cSBarry Smith        0,
2041cc2dc46cSBarry Smith        0,
204297304618SKris Buschelman /*10*/ 0,
2043cc2dc46cSBarry Smith        MatLUFactor_SeqBAIJ,
2044cc2dc46cSBarry Smith        0,
2045b6490206SBarry Smith        0,
2046f2501298SSatish Balay        MatTranspose_SeqBAIJ,
204797304618SKris Buschelman /*15*/ MatGetInfo_SeqBAIJ,
2048cc2dc46cSBarry Smith        MatEqual_SeqBAIJ,
2049cc2dc46cSBarry Smith        MatGetDiagonal_SeqBAIJ,
2050cc2dc46cSBarry Smith        MatDiagonalScale_SeqBAIJ,
2051cc2dc46cSBarry Smith        MatNorm_SeqBAIJ,
205297304618SKris Buschelman /*20*/ 0,
2053cc2dc46cSBarry Smith        MatAssemblyEnd_SeqBAIJ,
2054cc2dc46cSBarry Smith        0,
2055cc2dc46cSBarry Smith        MatSetOption_SeqBAIJ,
2056cc2dc46cSBarry Smith        MatZeroEntries_SeqBAIJ,
205797304618SKris Buschelman /*25*/ MatZeroRows_SeqBAIJ,
2058cc2dc46cSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,
2059cc2dc46cSBarry Smith        MatLUFactorNumeric_SeqBAIJ_N,
2060c05c3958SHong Zhang        MatCholeskyFactorSymbolic_SeqBAIJ,
2061c05c3958SHong Zhang        MatCholeskyFactorNumeric_SeqBAIJ_N,
206297304618SKris Buschelman /*30*/ MatSetUpPreallocation_SeqBAIJ,
2063cc2dc46cSBarry Smith        MatILUFactorSymbolic_SeqBAIJ,
2064c05c3958SHong Zhang        MatICCFactorSymbolic_SeqBAIJ,
2065f2a5309cSSatish Balay        MatGetArray_SeqBAIJ,
2066f2a5309cSSatish Balay        MatRestoreArray_SeqBAIJ,
206797304618SKris Buschelman /*35*/ MatDuplicate_SeqBAIJ,
2068cc2dc46cSBarry Smith        0,
2069cc2dc46cSBarry Smith        0,
2070cc2dc46cSBarry Smith        MatILUFactor_SeqBAIJ,
2071cc2dc46cSBarry Smith        0,
207297304618SKris Buschelman /*40*/ MatAXPY_SeqBAIJ,
2073cc2dc46cSBarry Smith        MatGetSubMatrices_SeqBAIJ,
2074cc2dc46cSBarry Smith        MatIncreaseOverlap_SeqBAIJ,
2075cc2dc46cSBarry Smith        MatGetValues_SeqBAIJ,
2076cc2dc46cSBarry Smith        0,
207797304618SKris Buschelman /*45*/ MatPrintHelp_SeqBAIJ,
2078cc2dc46cSBarry Smith        MatScale_SeqBAIJ,
2079cc2dc46cSBarry Smith        0,
2080cc2dc46cSBarry Smith        0,
2081cc2dc46cSBarry Smith        0,
2082521d7252SBarry Smith /*50*/ 0,
20833b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
208492c4ed94SBarry Smith        MatRestoreRowIJ_SeqBAIJ,
2085cc2dc46cSBarry Smith        0,
2086cc2dc46cSBarry Smith        0,
208797304618SKris Buschelman /*55*/ 0,
2088cc2dc46cSBarry Smith        0,
2089cc2dc46cSBarry Smith        0,
2090cc2dc46cSBarry Smith        0,
2091d3825aa8SBarry Smith        MatSetValuesBlocked_SeqBAIJ,
209297304618SKris Buschelman /*60*/ MatGetSubMatrix_SeqBAIJ,
2093b9b97703SBarry Smith        MatDestroy_SeqBAIJ,
2094b9b97703SBarry Smith        MatView_SeqBAIJ,
20953a64cc32SBarry Smith        MatGetPetscMaps_Petsc,
2096273d9f13SBarry Smith        0,
209797304618SKris Buschelman /*65*/ 0,
2098273d9f13SBarry Smith        0,
2099273d9f13SBarry Smith        0,
2100273d9f13SBarry Smith        0,
2101273d9f13SBarry Smith        0,
210297304618SKris Buschelman /*70*/ MatGetRowMax_SeqBAIJ,
210397304618SKris Buschelman        MatConvert_Basic,
2104273d9f13SBarry Smith        0,
210597304618SKris Buschelman        0,
210697304618SKris Buschelman        0,
210797304618SKris Buschelman /*75*/ 0,
210897304618SKris Buschelman        0,
210997304618SKris Buschelman        0,
211097304618SKris Buschelman        0,
211197304618SKris Buschelman        0,
211297304618SKris Buschelman /*80*/ 0,
211397304618SKris Buschelman        0,
211497304618SKris Buschelman        0,
211597304618SKris Buschelman        0,
2116865e5f61SKris Buschelman        MatLoad_SeqBAIJ,
2117865e5f61SKris Buschelman /*85*/ 0,
2118b01c7715SBarry Smith        0,
2119b01c7715SBarry Smith        0,
2120b01c7715SBarry Smith        0,
2121865e5f61SKris Buschelman        0,
2122865e5f61SKris Buschelman /*90*/ 0,
2123865e5f61SKris Buschelman        0,
2124865e5f61SKris Buschelman        0,
2125865e5f61SKris Buschelman        0,
2126865e5f61SKris Buschelman        0,
2127865e5f61SKris Buschelman /*95*/ 0,
2128865e5f61SKris Buschelman        0,
2129865e5f61SKris Buschelman        0,
2130865e5f61SKris Buschelman        0};
21312593348eSBarry Smith 
21323e90b805SBarry Smith EXTERN_C_BEGIN
21334a2ae208SSatish Balay #undef __FUNCT__
21344a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2135*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
21363e90b805SBarry Smith {
21373e90b805SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2138521d7252SBarry Smith   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2139dfbe8321SBarry Smith   PetscErrorCode ierr;
21403e90b805SBarry Smith 
21413e90b805SBarry Smith   PetscFunctionBegin;
21423e90b805SBarry Smith   if (aij->nonew != 1) {
2143634064b4SBarry Smith     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
21443e90b805SBarry Smith   }
21453e90b805SBarry Smith 
21463e90b805SBarry Smith   /* allocate space for values if not already there */
21473e90b805SBarry Smith   if (!aij->saved_values) {
214887828ca2SBarry Smith     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
21493e90b805SBarry Smith   }
21503e90b805SBarry Smith 
21513e90b805SBarry Smith   /* copy values over */
215287828ca2SBarry Smith   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
21533e90b805SBarry Smith   PetscFunctionReturn(0);
21543e90b805SBarry Smith }
21553e90b805SBarry Smith EXTERN_C_END
21563e90b805SBarry Smith 
21573e90b805SBarry Smith EXTERN_C_BEGIN
21584a2ae208SSatish Balay #undef __FUNCT__
21594a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2160*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
21613e90b805SBarry Smith {
21623e90b805SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
21636849ba73SBarry Smith   PetscErrorCode ierr;
2164521d7252SBarry Smith   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
21653e90b805SBarry Smith 
21663e90b805SBarry Smith   PetscFunctionBegin;
21673e90b805SBarry Smith   if (aij->nonew != 1) {
2168634064b4SBarry Smith     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
21693e90b805SBarry Smith   }
21703e90b805SBarry Smith   if (!aij->saved_values) {
2171634064b4SBarry Smith     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
21723e90b805SBarry Smith   }
21733e90b805SBarry Smith 
21743e90b805SBarry Smith   /* copy values over */
217587828ca2SBarry Smith   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
21763e90b805SBarry Smith   PetscFunctionReturn(0);
21773e90b805SBarry Smith }
21783e90b805SBarry Smith EXTERN_C_END
21793e90b805SBarry Smith 
2180273d9f13SBarry Smith EXTERN_C_BEGIN
2181*be1d678aSKris Buschelman extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
2182*be1d678aSKris Buschelman extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*);
2183273d9f13SBarry Smith EXTERN_C_END
2184273d9f13SBarry Smith 
2185273d9f13SBarry Smith EXTERN_C_BEGIN
21864a2ae208SSatish Balay #undef __FUNCT__
2187a23d5eceSKris Buschelman #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2188*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2189a23d5eceSKris Buschelman {
2190a23d5eceSKris Buschelman   Mat_SeqBAIJ    *b;
21916849ba73SBarry Smith   PetscErrorCode ierr;
2192c1ac3661SBarry Smith   PetscInt       i,len,mbs,nbs,bs2,newbs = bs;
2193a23d5eceSKris Buschelman   PetscTruth     flg;
2194a23d5eceSKris Buschelman 
2195a23d5eceSKris Buschelman   PetscFunctionBegin;
2196a23d5eceSKris Buschelman 
2197a23d5eceSKris Buschelman   B->preallocated = PETSC_TRUE;
2198a23d5eceSKris Buschelman   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2199a23d5eceSKris Buschelman   if (nnz && newbs != bs) {
2200634064b4SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2201a23d5eceSKris Buschelman   }
2202a23d5eceSKris Buschelman   bs   = newbs;
2203a23d5eceSKris Buschelman 
2204a23d5eceSKris Buschelman   mbs  = B->m/bs;
2205a23d5eceSKris Buschelman   nbs  = B->n/bs;
2206a23d5eceSKris Buschelman   bs2  = bs*bs;
2207a23d5eceSKris Buschelman 
2208a23d5eceSKris Buschelman   if (mbs*bs!=B->m || nbs*bs!=B->n) {
220977431f27SBarry Smith     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2210a23d5eceSKris Buschelman   }
2211a23d5eceSKris Buschelman 
2212a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
221377431f27SBarry Smith   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2214a23d5eceSKris Buschelman   if (nnz) {
2215a23d5eceSKris Buschelman     for (i=0; i<mbs; i++) {
221677431f27SBarry Smith       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
221777431f27SBarry Smith       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2218a23d5eceSKris Buschelman     }
2219a23d5eceSKris Buschelman   }
2220a23d5eceSKris Buschelman 
2221a23d5eceSKris Buschelman   b       = (Mat_SeqBAIJ*)B->data;
2222a23d5eceSKris Buschelman   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2223a23d5eceSKris Buschelman   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2224a23d5eceSKris Buschelman   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2225a23d5eceSKris Buschelman   if (!flg) {
2226a23d5eceSKris Buschelman     switch (bs) {
2227a23d5eceSKris Buschelman     case 1:
2228a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2229a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_1;
2230a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2231a23d5eceSKris Buschelman       break;
2232a23d5eceSKris Buschelman     case 2:
2233a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2234a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_2;
2235a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2236b01c7715SBarry Smith       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2237a23d5eceSKris Buschelman       break;
2238a23d5eceSKris Buschelman     case 3:
2239a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2240a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_3;
2241a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2242b01c7715SBarry Smith       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2243a23d5eceSKris Buschelman       break;
2244a23d5eceSKris Buschelman     case 4:
2245a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2246a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_4;
2247a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2248b01c7715SBarry Smith       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2249a23d5eceSKris Buschelman       break;
2250a23d5eceSKris Buschelman     case 5:
2251a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2252a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_5;
2253a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2254b01c7715SBarry Smith       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2255a23d5eceSKris Buschelman       break;
2256a23d5eceSKris Buschelman     case 6:
2257a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2258a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_6;
2259a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2260a23d5eceSKris Buschelman       break;
2261a23d5eceSKris Buschelman     case 7:
2262a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2263a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_7;
2264a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2265a23d5eceSKris Buschelman       break;
2266a23d5eceSKris Buschelman     default:
2267a23d5eceSKris Buschelman       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2268a23d5eceSKris Buschelman       B->ops->mult            = MatMult_SeqBAIJ_N;
2269a23d5eceSKris Buschelman       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2270a23d5eceSKris Buschelman       break;
2271a23d5eceSKris Buschelman     }
2272a23d5eceSKris Buschelman   }
2273521d7252SBarry Smith   B->bs      = bs;
2274a23d5eceSKris Buschelman   b->mbs     = mbs;
2275a23d5eceSKris Buschelman   b->nbs     = nbs;
2276c1ac3661SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr);
2277a23d5eceSKris Buschelman   if (!nnz) {
2278a23d5eceSKris Buschelman     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2279a23d5eceSKris Buschelman     else if (nz <= 0)        nz = 1;
2280a23d5eceSKris Buschelman     for (i=0; i<mbs; i++) b->imax[i] = nz;
2281a23d5eceSKris Buschelman     nz = nz*mbs;
2282a23d5eceSKris Buschelman   } else {
2283a23d5eceSKris Buschelman     nz = 0;
2284a23d5eceSKris Buschelman     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2285a23d5eceSKris Buschelman   }
2286a23d5eceSKris Buschelman 
2287a23d5eceSKris Buschelman   /* allocate the matrix space */
2288c1ac3661SBarry Smith   len   = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
2289a23d5eceSKris Buschelman   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2290a23d5eceSKris Buschelman   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2291c1ac3661SBarry Smith   b->j  = (PetscInt*)(b->a + nz*bs2);
2292c1ac3661SBarry Smith   ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2293a23d5eceSKris Buschelman   b->i  = b->j + nz;
2294a23d5eceSKris Buschelman   b->singlemalloc = PETSC_TRUE;
2295a23d5eceSKris Buschelman 
2296a23d5eceSKris Buschelman   b->i[0] = 0;
2297a23d5eceSKris Buschelman   for (i=1; i<mbs+1; i++) {
2298a23d5eceSKris Buschelman     b->i[i] = b->i[i-1] + b->imax[i-1];
2299a23d5eceSKris Buschelman   }
2300a23d5eceSKris Buschelman 
2301a23d5eceSKris Buschelman   /* b->ilen will count nonzeros in each block row so far. */
2302c1ac3661SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr);
230352e6d16bSBarry Smith   ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2304a23d5eceSKris Buschelman   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2305a23d5eceSKris Buschelman 
2306521d7252SBarry Smith   B->bs               = bs;
2307a23d5eceSKris Buschelman   b->bs2              = bs2;
2308a23d5eceSKris Buschelman   b->mbs              = mbs;
2309a23d5eceSKris Buschelman   b->nz               = 0;
2310a23d5eceSKris Buschelman   b->maxnz            = nz*bs2;
2311a23d5eceSKris Buschelman   B->info.nz_unneeded = (PetscReal)b->maxnz;
2312a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2313a23d5eceSKris Buschelman }
2314a23d5eceSKris Buschelman EXTERN_C_END
2315a23d5eceSKris Buschelman 
23160bad9183SKris Buschelman /*MC
2317fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
23180bad9183SKris Buschelman    block sparse compressed row format.
23190bad9183SKris Buschelman 
23200bad9183SKris Buschelman    Options Database Keys:
23210bad9183SKris Buschelman . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
23220bad9183SKris Buschelman 
23230bad9183SKris Buschelman   Level: beginner
23240bad9183SKris Buschelman 
23250bad9183SKris Buschelman .seealso: MatCreateSeqBAIJ
23260bad9183SKris Buschelman M*/
23270bad9183SKris Buschelman 
2328a23d5eceSKris Buschelman EXTERN_C_BEGIN
2329a23d5eceSKris Buschelman #undef __FUNCT__
23304a2ae208SSatish Balay #define __FUNCT__ "MatCreate_SeqBAIJ"
2331*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
23322593348eSBarry Smith {
2333dfbe8321SBarry Smith   PetscErrorCode ierr;
2334c1ac3661SBarry Smith   PetscMPIInt    size;
2335b6490206SBarry Smith   Mat_SeqBAIJ    *b;
23363b2fbd54SBarry Smith 
23373a40ed3dSBarry Smith   PetscFunctionBegin;
2338273d9f13SBarry Smith   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
233929bbc08cSBarry Smith   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2340b6490206SBarry Smith 
2341273d9f13SBarry Smith   B->m = B->M = PetscMax(B->m,B->M);
2342273d9f13SBarry Smith   B->n = B->N = PetscMax(B->n,B->N);
2343b0a32e0cSBarry Smith   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2344b0a32e0cSBarry Smith   B->data = (void*)b;
2345549d3d68SSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
23462593348eSBarry Smith   B->factor           = 0;
234790f02eecSBarry Smith   B->mapping          = 0;
23482593348eSBarry Smith   b->row              = 0;
23492593348eSBarry Smith   b->col              = 0;
2350e51c0b9cSSatish Balay   b->icol             = 0;
23512593348eSBarry Smith   b->reallocs         = 0;
23523e90b805SBarry Smith   b->saved_values     = 0;
2353cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
2354563b5814SBarry Smith   b->setvalueslen     = 0;
2355563b5814SBarry Smith   b->setvaluescopy    = PETSC_NULL;
2356563b5814SBarry Smith #endif
23572593348eSBarry Smith 
23583a64cc32SBarry Smith   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
23593a64cc32SBarry Smith   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2360a5ae1ecdSBarry Smith 
2361c4992f7dSBarry Smith   b->sorted           = PETSC_FALSE;
2362c4992f7dSBarry Smith   b->roworiented      = PETSC_TRUE;
23632593348eSBarry Smith   b->nonew            = 0;
23642593348eSBarry Smith   b->diag             = 0;
23652593348eSBarry Smith   b->solve_work       = 0;
2366de6a44a3SBarry Smith   b->mult_work        = 0;
23672a1b7f2aSHong Zhang   B->spptr            = 0;
23680e6d2581SBarry Smith   B->info.nz_unneeded = (PetscReal)b->maxnz;
2369883fce79SBarry Smith   b->keepzeroedrows   = PETSC_FALSE;
2370c4319e64SHong Zhang   b->xtoy              = 0;
2371c4319e64SHong Zhang   b->XtoY              = 0;
237273e7a558SHong Zhang   b->compressedrow.use     = PETSC_FALSE;
237326e093fcSHong Zhang   b->compressedrow.nrows   = 0;
237473e7a558SHong Zhang   b->compressedrow.i       = PETSC_NULL;
237573e7a558SHong Zhang   b->compressedrow.rindex  = PETSC_NULL;
237673e7a558SHong Zhang   b->compressedrow.checked = PETSC_FALSE;
237788e51ccdSHong Zhang   B->same_nonzero          = PETSC_FALSE;
23784e220ebcSLois Curfman McInnes 
2379f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
23803e90b805SBarry Smith                                      "MatStoreValues_SeqBAIJ",
2381bc4b532fSSatish Balay                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2382f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
23833e90b805SBarry Smith                                      "MatRetrieveValues_SeqBAIJ",
2384bc4b532fSSatish Balay                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2385f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
238627a8da17SBarry Smith                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2387bc4b532fSSatish Balay                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2388a6175056SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2389273d9f13SBarry Smith                                      "MatConvert_SeqBAIJ_SeqAIJ",
2390273d9f13SBarry Smith                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2391b24ad042SBarry Smith #if !defined(PETSC_USE_64BIT_INT)
2392a0e1a404SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2393a0e1a404SHong Zhang                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2394a0e1a404SHong Zhang                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2395b24ad042SBarry Smith #endif
2396a23d5eceSKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2397a23d5eceSKris Buschelman                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2398a23d5eceSKris Buschelman                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
23993a40ed3dSBarry Smith   PetscFunctionReturn(0);
24002593348eSBarry Smith }
2401273d9f13SBarry Smith EXTERN_C_END
24022593348eSBarry Smith 
24034a2ae208SSatish Balay #undef __FUNCT__
24044a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2405dfbe8321SBarry Smith PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
24062593348eSBarry Smith {
24072593348eSBarry Smith   Mat            C;
2408b6490206SBarry Smith   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
24096849ba73SBarry Smith   PetscErrorCode ierr;
2410c1ac3661SBarry Smith   PetscInt       i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2411de6a44a3SBarry Smith 
24123a40ed3dSBarry Smith   PetscFunctionBegin;
241329bbc08cSBarry Smith   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
24142593348eSBarry Smith 
24152593348eSBarry Smith   *B = 0;
2416273d9f13SBarry Smith   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2417be5d1d56SKris Buschelman   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
24181d5dac46SHong Zhang   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2419273d9f13SBarry Smith   c    = (Mat_SeqBAIJ*)C->data;
242044cd7ae7SLois Curfman McInnes 
24214beb1cfeSHong Zhang   C->M   = A->M;
24224beb1cfeSHong Zhang   C->N   = A->N;
2423521d7252SBarry Smith   C->bs  = A->bs;
24249df24120SSatish Balay   c->bs2 = a->bs2;
2425b6490206SBarry Smith   c->mbs = a->mbs;
2426b6490206SBarry Smith   c->nbs = a->nbs;
24272593348eSBarry Smith 
2428c1ac3661SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2429690b6cddSBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
2430b6490206SBarry Smith   for (i=0; i<mbs; i++) {
24312593348eSBarry Smith     c->imax[i] = a->imax[i];
24322593348eSBarry Smith     c->ilen[i] = a->ilen[i];
24332593348eSBarry Smith   }
24342593348eSBarry Smith 
24352593348eSBarry Smith   /* allocate the matrix space */
2436c4992f7dSBarry Smith   c->singlemalloc = PETSC_TRUE;
2437c1ac3661SBarry Smith   len  = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
2438b0a32e0cSBarry Smith   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2439690b6cddSBarry Smith   c->j = (PetscInt*)(c->a + nz*bs2);
2440de6a44a3SBarry Smith   c->i = c->j + nz;
2441c1ac3661SBarry Smith   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2442b6490206SBarry Smith   if (mbs > 0) {
2443c1ac3661SBarry Smith     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
24442e8a6d31SBarry Smith     if (cpvalues == MAT_COPY_VALUES) {
2445549d3d68SSatish Balay       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
24462e8a6d31SBarry Smith     } else {
2447549d3d68SSatish Balay       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
24482593348eSBarry Smith     }
24492593348eSBarry Smith   }
24502593348eSBarry Smith 
245152e6d16bSBarry Smith   ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
24522593348eSBarry Smith   c->sorted      = a->sorted;
24532593348eSBarry Smith   c->roworiented = a->roworiented;
24542593348eSBarry Smith   c->nonew       = a->nonew;
24552593348eSBarry Smith 
24562593348eSBarry Smith   if (a->diag) {
2457c1ac3661SBarry Smith     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
245852e6d16bSBarry Smith     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2459b6490206SBarry Smith     for (i=0; i<mbs; i++) {
24602593348eSBarry Smith       c->diag[i] = a->diag[i];
24612593348eSBarry Smith     }
246298305bb5SBarry Smith   } else c->diag        = 0;
24632593348eSBarry Smith   c->nz                 = a->nz;
24642593348eSBarry Smith   c->maxnz              = a->maxnz;
24652593348eSBarry Smith   c->solve_work         = 0;
24667fc0212eSBarry Smith   c->mult_work          = 0;
2467273d9f13SBarry Smith   C->preallocated       = PETSC_TRUE;
2468273d9f13SBarry Smith   C->assembled          = PETSC_TRUE;
246988e51ccdSHong Zhang 
247088e51ccdSHong Zhang   c->compressedrow.use     = a->compressedrow.use;
247188e51ccdSHong Zhang   c->compressedrow.nrows   = a->compressedrow.nrows;
247288e51ccdSHong Zhang   c->compressedrow.checked = a->compressedrow.checked;
247388e51ccdSHong Zhang   if ( a->compressedrow.checked && a->compressedrow.use){
247488e51ccdSHong Zhang     i = a->compressedrow.nrows;
247588e51ccdSHong Zhang     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
247688e51ccdSHong Zhang     c->compressedrow.rindex = c->compressedrow.i + i + 1;
247788e51ccdSHong Zhang     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
247888e51ccdSHong Zhang     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
247988e51ccdSHong Zhang   } else {
248088e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
248188e51ccdSHong Zhang     c->compressedrow.i      = PETSC_NULL;
248288e51ccdSHong Zhang     c->compressedrow.rindex = PETSC_NULL;
248388e51ccdSHong Zhang   }
248488e51ccdSHong Zhang   C->same_nonzero = A->same_nonzero;
24852593348eSBarry Smith   *B = C;
2486b0a32e0cSBarry Smith   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
24873a40ed3dSBarry Smith   PetscFunctionReturn(0);
24882593348eSBarry Smith }
24892593348eSBarry Smith 
24904a2ae208SSatish Balay #undef __FUNCT__
24914a2ae208SSatish Balay #define __FUNCT__ "MatLoad_SeqBAIJ"
2492dfbe8321SBarry Smith PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
24932593348eSBarry Smith {
2494b6490206SBarry Smith   Mat_SeqBAIJ    *a;
24952593348eSBarry Smith   Mat            B;
24966849ba73SBarry Smith   PetscErrorCode ierr;
2497b24ad042SBarry Smith   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2498c1ac3661SBarry Smith   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2499c1ac3661SBarry Smith   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2500c1ac3661SBarry Smith   PetscInt       *masked,nmask,tmp,bs2,ishift;
2501b24ad042SBarry Smith   PetscMPIInt    size;
2502b24ad042SBarry Smith   int            fd;
250387828ca2SBarry Smith   PetscScalar    *aa;
250419bcc07fSBarry Smith   MPI_Comm       comm = ((PetscObject)viewer)->comm;
25052593348eSBarry Smith 
25063a40ed3dSBarry Smith   PetscFunctionBegin;
2507b0a32e0cSBarry Smith   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2508de6a44a3SBarry Smith   bs2  = bs*bs;
2509b6490206SBarry Smith 
2510d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251129bbc08cSBarry Smith   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2512b0a32e0cSBarry Smith   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
25130752156aSBarry Smith   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2514552e946dSBarry Smith   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
25152593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
25162593348eSBarry Smith 
2517d64ed03dSBarry Smith   if (header[3] < 0) {
251829bbc08cSBarry Smith     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2519d64ed03dSBarry Smith   }
2520d64ed03dSBarry Smith 
252129bbc08cSBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
252235aab85fSBarry Smith 
252335aab85fSBarry Smith   /*
252435aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
252535aab85fSBarry Smith     divisible by the blocksize
252635aab85fSBarry Smith   */
2527b6490206SBarry Smith   mbs        = M/bs;
252835aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
252935aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
253035aab85fSBarry Smith   else                  mbs++;
253135aab85fSBarry Smith   if (extra_rows) {
253263ba0a88SBarry Smith     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
253335aab85fSBarry Smith   }
2534b6490206SBarry Smith 
25352593348eSBarry Smith   /* read in row lengths */
2536c1ac3661SBarry Smith   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
25370752156aSBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
253835aab85fSBarry Smith   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
25392593348eSBarry Smith 
2540b6490206SBarry Smith   /* read in column indices */
2541c1ac3661SBarry Smith   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
25420752156aSBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
254335aab85fSBarry Smith   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2544b6490206SBarry Smith 
2545b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
2546c1ac3661SBarry Smith   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2547c1ac3661SBarry Smith   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2548c1ac3661SBarry Smith   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2549c1ac3661SBarry Smith   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
255035aab85fSBarry Smith   masked   = mask + mbs;
2551b6490206SBarry Smith   rowcount = 0; nzcount = 0;
2552b6490206SBarry Smith   for (i=0; i<mbs; i++) {
255335aab85fSBarry Smith     nmask = 0;
2554b6490206SBarry Smith     for (j=0; j<bs; j++) {
2555b6490206SBarry Smith       kmax = rowlengths[rowcount];
2556b6490206SBarry Smith       for (k=0; k<kmax; k++) {
255735aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
255835aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2559b6490206SBarry Smith       }
2560b6490206SBarry Smith       rowcount++;
2561b6490206SBarry Smith     }
256235aab85fSBarry Smith     browlengths[i] += nmask;
256335aab85fSBarry Smith     /* zero out the mask elements we set */
256435aab85fSBarry Smith     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2565b6490206SBarry Smith   }
2566b6490206SBarry Smith 
25672593348eSBarry Smith   /* create our matrix */
2568dd23797bSKris Buschelman   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
256978ae41b4SKris Buschelman   ierr = MatSetType(B,type);CHKERRQ(ierr);
257078ae41b4SKris Buschelman   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2571b6490206SBarry Smith   a = (Mat_SeqBAIJ*)B->data;
25722593348eSBarry Smith 
25732593348eSBarry Smith   /* set matrix "i" values */
2574de6a44a3SBarry Smith   a->i[0] = 0;
2575b6490206SBarry Smith   for (i=1; i<= mbs; i++) {
2576b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
2577b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
25782593348eSBarry Smith   }
2579b6490206SBarry Smith   a->nz         = 0;
2580b6490206SBarry Smith   for (i=0; i<mbs; i++) a->nz += browlengths[i];
25812593348eSBarry Smith 
2582b6490206SBarry Smith   /* read in nonzero values */
258387828ca2SBarry Smith   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
25840752156aSBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
258535aab85fSBarry Smith   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2586b6490206SBarry Smith 
2587b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2588b6490206SBarry Smith   nzcount = 0; jcount = 0;
2589b6490206SBarry Smith   for (i=0; i<mbs; i++) {
2590b6490206SBarry Smith     nzcountb = nzcount;
259135aab85fSBarry Smith     nmask    = 0;
2592b6490206SBarry Smith     for (j=0; j<bs; j++) {
2593b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2594b6490206SBarry Smith       for (k=0; k<kmax; k++) {
259535aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
259635aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2597b6490206SBarry Smith       }
2598b6490206SBarry Smith     }
2599de6a44a3SBarry Smith     /* sort the masked values */
2600433994e6SBarry Smith     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2601de6a44a3SBarry Smith 
2602b6490206SBarry Smith     /* set "j" values into matrix */
2603b6490206SBarry Smith     maskcount = 1;
260435aab85fSBarry Smith     for (j=0; j<nmask; j++) {
260535aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2606de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2607b6490206SBarry Smith     }
2608b6490206SBarry Smith     /* set "a" values into matrix */
2609de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2610b6490206SBarry Smith     for (j=0; j<bs; j++) {
2611b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2612b6490206SBarry Smith       for (k=0; k<kmax; k++) {
2613de6a44a3SBarry Smith         tmp       = jj[nzcountb]/bs ;
2614de6a44a3SBarry Smith         block     = mask[tmp] - 1;
2615de6a44a3SBarry Smith         point     = jj[nzcountb] - bs*tmp;
2616de6a44a3SBarry Smith         idx       = ishift + bs2*block + j + bs*point;
2617375fe846SBarry Smith         a->a[idx] = (MatScalar)aa[nzcountb++];
2618b6490206SBarry Smith       }
2619b6490206SBarry Smith     }
262035aab85fSBarry Smith     /* zero out the mask elements we set */
262135aab85fSBarry Smith     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2622b6490206SBarry Smith   }
262329bbc08cSBarry Smith   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2624b6490206SBarry Smith 
2625606d414cSSatish Balay   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2626606d414cSSatish Balay   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2627606d414cSSatish Balay   ierr = PetscFree(aa);CHKERRQ(ierr);
2628606d414cSSatish Balay   ierr = PetscFree(jj);CHKERRQ(ierr);
2629606d414cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
2630b6490206SBarry Smith 
263178ae41b4SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
263278ae41b4SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
26339c01be13SBarry Smith   ierr = MatView_Private(B);CHKERRQ(ierr);
263478ae41b4SKris Buschelman 
263578ae41b4SKris Buschelman   *A = B;
26363a40ed3dSBarry Smith   PetscFunctionReturn(0);
26372593348eSBarry Smith }
26382593348eSBarry Smith 
26394a2ae208SSatish Balay #undef __FUNCT__
26404a2ae208SSatish Balay #define __FUNCT__ "MatCreateSeqBAIJ"
2641273d9f13SBarry Smith /*@C
2642273d9f13SBarry Smith    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2643273d9f13SBarry Smith    compressed row) format.  For good matrix assembly performance the
2644273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
2645273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
2646273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
26472593348eSBarry Smith 
2648273d9f13SBarry Smith    Collective on MPI_Comm
2649273d9f13SBarry Smith 
2650273d9f13SBarry Smith    Input Parameters:
2651273d9f13SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
2652273d9f13SBarry Smith .  bs - size of block
2653273d9f13SBarry Smith .  m - number of rows
2654273d9f13SBarry Smith .  n - number of columns
265535d8aa7fSBarry Smith .  nz - number of nonzero blocks  per block row (same for all rows)
265635d8aa7fSBarry Smith -  nnz - array containing the number of nonzero blocks in the various block rows
2657273d9f13SBarry Smith          (possibly different for each block row) or PETSC_NULL
2658273d9f13SBarry Smith 
2659273d9f13SBarry Smith    Output Parameter:
2660273d9f13SBarry Smith .  A - the matrix
2661273d9f13SBarry Smith 
2662273d9f13SBarry Smith    Options Database Keys:
2663273d9f13SBarry Smith .   -mat_no_unroll - uses code that does not unroll the loops in the
2664273d9f13SBarry Smith                      block calculations (much slower)
2665273d9f13SBarry Smith .    -mat_block_size - size of the blocks to use
2666273d9f13SBarry Smith 
2667273d9f13SBarry Smith    Level: intermediate
2668273d9f13SBarry Smith 
2669273d9f13SBarry Smith    Notes:
2670d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
2671d1be2dadSMatthew Knepley 
267249a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
267349a6f317SBarry Smith 
267435d8aa7fSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
267535d8aa7fSBarry Smith 
2676273d9f13SBarry Smith    The block AIJ format is fully compatible with standard Fortran 77
2677273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
2678273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
2679273d9f13SBarry Smith 
2680273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
2681273d9f13SBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2682273d9f13SBarry Smith    allocation.  For additional details, see the users manual chapter on
2683273d9f13SBarry Smith    matrices.
2684273d9f13SBarry Smith 
2685273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2686273d9f13SBarry Smith @*/
2687*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2688273d9f13SBarry Smith {
2689dfbe8321SBarry Smith   PetscErrorCode ierr;
2690273d9f13SBarry Smith 
2691273d9f13SBarry Smith   PetscFunctionBegin;
2692273d9f13SBarry Smith   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2693273d9f13SBarry Smith   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2694273d9f13SBarry Smith   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2695273d9f13SBarry Smith   PetscFunctionReturn(0);
2696273d9f13SBarry Smith }
2697273d9f13SBarry Smith 
26984a2ae208SSatish Balay #undef __FUNCT__
26994a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2700273d9f13SBarry Smith /*@C
2701273d9f13SBarry Smith    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2702273d9f13SBarry Smith    per row in the matrix. For good matrix assembly performance the
2703273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
2704273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
2705273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
2706273d9f13SBarry Smith 
2707273d9f13SBarry Smith    Collective on MPI_Comm
2708273d9f13SBarry Smith 
2709273d9f13SBarry Smith    Input Parameters:
2710273d9f13SBarry Smith +  A - the matrix
2711273d9f13SBarry Smith .  bs - size of block
2712273d9f13SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
2713273d9f13SBarry Smith -  nnz - array containing the number of block nonzeros in the various block rows
2714273d9f13SBarry Smith          (possibly different for each block row) or PETSC_NULL
2715273d9f13SBarry Smith 
2716273d9f13SBarry Smith    Options Database Keys:
2717273d9f13SBarry Smith .   -mat_no_unroll - uses code that does not unroll the loops in the
2718273d9f13SBarry Smith                      block calculations (much slower)
2719273d9f13SBarry Smith .    -mat_block_size - size of the blocks to use
2720273d9f13SBarry Smith 
2721273d9f13SBarry Smith    Level: intermediate
2722273d9f13SBarry Smith 
2723273d9f13SBarry Smith    Notes:
272449a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
272549a6f317SBarry Smith 
2726273d9f13SBarry Smith    The block AIJ format is fully compatible with standard Fortran 77
2727273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
2728273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
2729273d9f13SBarry Smith 
2730273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
2731273d9f13SBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2732273d9f13SBarry Smith    allocation.  For additional details, see the users manual chapter on
2733273d9f13SBarry Smith    matrices.
2734273d9f13SBarry Smith 
2735273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2736273d9f13SBarry Smith @*/
2737*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2738273d9f13SBarry Smith {
2739c1ac3661SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2740273d9f13SBarry Smith 
2741273d9f13SBarry Smith   PetscFunctionBegin;
2742a23d5eceSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2743a23d5eceSKris Buschelman   if (f) {
2744a23d5eceSKris Buschelman     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2745273d9f13SBarry Smith   }
2746273d9f13SBarry Smith   PetscFunctionReturn(0);
2747273d9f13SBarry Smith }
2748a1d92eedSBarry Smith 
2749