xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 9af31e4ad595286b4e2df8194fee047feeccfe42)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include "src/mat/impls/baij/seq/baij.h"
6 #include "src/inline/spops.h"
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 
9 #include "src/inline/ilu.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 int MatInvertBlockDiagonal_SeqBAIJ(Mat A)
14 {
15   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
16   int         *diag_offset,i,bs = a->bs,mbs = a->mbs,ierr;
17   PetscScalar *v = a->a,*odiag,*diag,*mdiag;
18 
19   PetscFunctionBegin;
20   if (a->idiagvalid) PetscFunctionReturn(0);
21   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
22   diag_offset = a->diag;
23   if (!a->idiag) {
24     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
25   }
26   diag  = a->idiag;
27   mdiag = a->idiag+bs*bs*mbs;
28   /* factor and invert each block */
29   switch (a->bs){
30     case 2:
31       for (i=0; i<mbs; i++) {
32         odiag   = v + 4*diag_offset[i];
33         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
34 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
35 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
36 	diag    += 4;
37 	mdiag   += 4;
38       }
39       break;
40     case 3:
41       for (i=0; i<mbs; i++) {
42         odiag    = v + 9*diag_offset[i];
43         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
44         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
45         diag[8]  = odiag[8];
46         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
47         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
48         mdiag[8] = odiag[8];
49 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
50 	diag    += 9;
51 	mdiag   += 9;
52       }
53       break;
54     case 4:
55       for (i=0; i<mbs; i++) {
56         odiag  = v + 16*diag_offset[i];
57         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
58         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
59 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
60 	diag  += 16;
61 	mdiag += 16;
62       }
63       break;
64     case 5:
65       for (i=0; i<mbs; i++) {
66         odiag = v + 25*diag_offset[i];
67         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
68         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
69 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
70 	diag  += 25;
71 	mdiag += 25;
72       }
73       break;
74     default:
75       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %d",a->bs);
76   }
77   a->idiagvalid = PETSC_TRUE;
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
83 int MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
84 {
85   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
86   PetscScalar        *x,x1,x2,s1,s2;
87   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
88   int                ierr,m = a->mbs,i,i2,nz,idx;
89   const int          *diag,*ai = a->i,*aj = a->j,*vi;
90 
91   PetscFunctionBegin;
92   its = its*lits;
93   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
94   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
95   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
96   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
97   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
98 
99   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
100 
101   diag  = a->diag;
102   idiag = a->idiag;
103   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
104   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
105 
106   if (flag & SOR_ZERO_INITIAL_GUESS) {
107     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
108       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
109       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
110       i2     = 2;
111       idiag += 4;
112       for (i=1; i<m; i++) {
113 	v     = aa + 4*ai[i];
114 	vi    = aj + ai[i];
115 	nz    = diag[i] - ai[i];
116 	s1    = b[i2]; s2 = b[i2+1];
117 	while (nz--) {
118 	  idx  = 2*(*vi++);
119 	  x1   = x[idx]; x2 = x[1+idx];
120 	  s1  -= v[0]*x1 + v[2]*x2;
121 	  s2  -= v[1]*x1 + v[3]*x2;
122 	  v   += 4;
123 	}
124 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
125 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
126         idiag   += 4;
127         i2      += 2;
128       }
129       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
130       PetscLogFlops(4*(a->nz));
131     }
132     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
133         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
134       i2    = 0;
135       mdiag = a->idiag+4*a->mbs;
136       for (i=0; i<m; i++) {
137         x1      = x[i2]; x2 = x[i2+1];
138         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
139         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
140         mdiag  += 4;
141         i2     += 2;
142       }
143       PetscLogFlops(6*m);
144     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
145       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
146     }
147     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
148       idiag   = a->idiag+4*a->mbs - 4;
149       i2      = 2*m - 2;
150       x1      = x[i2]; x2 = x[i2+1];
151       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
152       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
153       idiag -= 4;
154       i2    -= 2;
155       for (i=m-2; i>=0; i--) {
156 	v     = aa + 4*(diag[i]+1);
157 	vi    = aj + diag[i] + 1;
158 	nz    = ai[i+1] - diag[i] - 1;
159 	s1    = x[i2]; s2 = x[i2+1];
160 	while (nz--) {
161 	  idx  = 2*(*vi++);
162 	  x1   = x[idx]; x2 = x[1+idx];
163 	  s1  -= v[0]*x1 + v[2]*x2;
164 	  s2  -= v[1]*x1 + v[3]*x2;
165 	  v   += 4;
166 	}
167 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
168 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
169         idiag   -= 4;
170         i2      -= 2;
171       }
172       PetscLogFlops(4*(a->nz));
173     }
174   } else {
175     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
176   }
177   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
178   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
184 int MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
185 {
186   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
187   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
188   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
189   int                ierr,m = a->mbs,i,i2,nz,idx;
190   const int          *diag,*ai = a->i,*aj = a->j,*vi;
191 
192   PetscFunctionBegin;
193   its = its*lits;
194   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
195   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
196   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
197   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
198   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
199 
200   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
201 
202   diag  = a->diag;
203   idiag = a->idiag;
204   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
205   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
206 
207   if (flag & SOR_ZERO_INITIAL_GUESS) {
208     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
209       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
210       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
211       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
212       i2     = 3;
213       idiag += 9;
214       for (i=1; i<m; i++) {
215 	v     = aa + 9*ai[i];
216 	vi    = aj + ai[i];
217 	nz    = diag[i] - ai[i];
218 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
219 	while (nz--) {
220 	  idx  = 3*(*vi++);
221 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
222 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
223 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
224 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
225 	  v   += 9;
226 	}
227 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
228 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
229 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
230         idiag   += 9;
231         i2      += 3;
232       }
233       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
234       PetscLogFlops(9*(a->nz));
235     }
236     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
237         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
238       i2    = 0;
239       mdiag = a->idiag+9*a->mbs;
240       for (i=0; i<m; i++) {
241         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
242         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
243         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
244         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
245         mdiag  += 9;
246         i2     += 3;
247       }
248       PetscLogFlops(15*m);
249     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
250       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
251     }
252     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
253       idiag   = a->idiag+9*a->mbs - 9;
254       i2      = 3*m - 3;
255       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
256       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
257       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
258       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
259       idiag -= 9;
260       i2    -= 3;
261       for (i=m-2; i>=0; i--) {
262 	v     = aa + 9*(diag[i]+1);
263 	vi    = aj + diag[i] + 1;
264 	nz    = ai[i+1] - diag[i] - 1;
265 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
266 	while (nz--) {
267 	  idx  = 3*(*vi++);
268 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
269 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
270 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
271 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
272 	  v   += 9;
273 	}
274 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
275 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
276 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
277         idiag   -= 9;
278         i2      -= 3;
279       }
280       PetscLogFlops(9*(a->nz));
281     }
282   } else {
283     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
284   }
285   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
286   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
287   PetscFunctionReturn(0);
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
292 int MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
293 {
294   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
295   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
296   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
297   int                ierr,m = a->mbs,i,i2,nz,idx;
298   const int          *diag,*ai = a->i,*aj = a->j,*vi;
299 
300   PetscFunctionBegin;
301   its = its*lits;
302   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
303   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
304   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
305   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
306   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
307 
308   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
309 
310   diag  = a->diag;
311   idiag = a->idiag;
312   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
313   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
314 
315   if (flag & SOR_ZERO_INITIAL_GUESS) {
316     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
317       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
318       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
319       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
320       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
321       i2     = 4;
322       idiag += 16;
323       for (i=1; i<m; i++) {
324 	v     = aa + 16*ai[i];
325 	vi    = aj + ai[i];
326 	nz    = diag[i] - ai[i];
327 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
328 	while (nz--) {
329 	  idx  = 4*(*vi++);
330 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
331 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
332 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
333 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
334 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
335 	  v   += 16;
336 	}
337 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
338 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
339 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
340 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
341         idiag   += 16;
342         i2      += 4;
343       }
344       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
345       PetscLogFlops(16*(a->nz));
346     }
347     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
348         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
349       i2    = 0;
350       mdiag = a->idiag+16*a->mbs;
351       for (i=0; i<m; i++) {
352         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
353         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
354         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
355         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
356         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
357         mdiag  += 16;
358         i2     += 4;
359       }
360       PetscLogFlops(28*m);
361     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
362       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
363     }
364     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
365       idiag   = a->idiag+16*a->mbs - 16;
366       i2      = 4*m - 4;
367       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
368       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
369       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
370       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
371       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
372       idiag -= 16;
373       i2    -= 4;
374       for (i=m-2; i>=0; i--) {
375 	v     = aa + 16*(diag[i]+1);
376 	vi    = aj + diag[i] + 1;
377 	nz    = ai[i+1] - diag[i] - 1;
378 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
379 	while (nz--) {
380 	  idx  = 4*(*vi++);
381 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
382 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
383 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
384 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
385 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
386 	  v   += 16;
387 	}
388 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
389 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
390 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
391 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
392         idiag   -= 16;
393         i2      -= 4;
394       }
395       PetscLogFlops(16*(a->nz));
396     }
397   } else {
398     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
399   }
400   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
401   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
407 int MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
408 {
409   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
410   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
411   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
412   int                ierr,m = a->mbs,i,i2,nz,idx;
413   const int          *diag,*ai = a->i,*aj = a->j,*vi;
414 
415   PetscFunctionBegin;
416   its = its*lits;
417   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
418   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
419   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
420   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
421   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
422 
423   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
424 
425   diag  = a->diag;
426   idiag = a->idiag;
427   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
428   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
429 
430   if (flag & SOR_ZERO_INITIAL_GUESS) {
431     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
432       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
433       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
434       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
435       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
436       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
437       i2     = 5;
438       idiag += 25;
439       for (i=1; i<m; i++) {
440 	v     = aa + 25*ai[i];
441 	vi    = aj + ai[i];
442 	nz    = diag[i] - ai[i];
443 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
444 	while (nz--) {
445 	  idx  = 5*(*vi++);
446 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
447 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
448 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
449 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
450 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
451 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
452 	  v   += 25;
453 	}
454 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
455 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
456 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
457 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
458 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
459         idiag   += 25;
460         i2      += 5;
461       }
462       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
463       PetscLogFlops(25*(a->nz));
464     }
465     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
466         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
467       i2    = 0;
468       mdiag = a->idiag+25*a->mbs;
469       for (i=0; i<m; i++) {
470         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
471         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
472         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
473         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
474         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
475         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
476         mdiag  += 25;
477         i2     += 5;
478       }
479       PetscLogFlops(45*m);
480     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
481       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
482     }
483     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
484       idiag   = a->idiag+25*a->mbs - 25;
485       i2      = 5*m - 5;
486       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
487       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
488       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
489       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
490       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
491       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
492       idiag -= 25;
493       i2    -= 5;
494       for (i=m-2; i>=0; i--) {
495 	v     = aa + 25*(diag[i]+1);
496 	vi    = aj + diag[i] + 1;
497 	nz    = ai[i+1] - diag[i] - 1;
498 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
499 	while (nz--) {
500 	  idx  = 5*(*vi++);
501 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
502 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
503 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
504 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
505 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
506 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
507 	  v   += 25;
508 	}
509 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
510 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
511 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
512 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
513 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
514         idiag   -= 25;
515         i2      -= 5;
516       }
517       PetscLogFlops(25*(a->nz));
518     }
519   } else {
520     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
521   }
522   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
523   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 /*
528     Special version for Fun3d sequential benchmark
529 */
530 #if defined(PETSC_HAVE_FORTRAN_CAPS)
531 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
532 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
533 #define matsetvaluesblocked4_ matsetvaluesblocked4
534 #endif
535 
536 EXTERN_C_BEGIN
537 #undef __FUNCT__
538 #define __FUNCT__ "matsetvaluesblocked4_"
539 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
540 {
541   Mat               A = *AA;
542   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
543   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
544   int               *ai=a->i,*ailen=a->ilen;
545   int               *aj=a->j,stepval;
546   const PetscScalar *value = v;
547   MatScalar         *ap,*aa = a->a,*bap;
548 
549   PetscFunctionBegin;
550   stepval = (n-1)*4;
551   for (k=0; k<m; k++) { /* loop over added rows */
552     row  = im[k];
553     rp   = aj + ai[row];
554     ap   = aa + 16*ai[row];
555     nrow = ailen[row];
556     low  = 0;
557     for (l=0; l<n; l++) { /* loop over added columns */
558       col = in[l];
559       value = v + k*(stepval+4)*4 + l*4;
560       low = 0; high = nrow;
561       while (high-low > 7) {
562         t = (low+high)/2;
563         if (rp[t] > col) high = t;
564         else             low  = t;
565       }
566       for (i=low; i<high; i++) {
567         if (rp[i] > col) break;
568         if (rp[i] == col) {
569           bap  = ap +  16*i;
570           for (ii=0; ii<4; ii++,value+=stepval) {
571             for (jj=ii; jj<16; jj+=4) {
572               bap[jj] += *value++;
573             }
574           }
575           goto noinsert2;
576         }
577       }
578       N = nrow++ - 1;
579       /* shift up all the later entries in this row */
580       for (ii=N; ii>=i; ii--) {
581         rp[ii+1] = rp[ii];
582         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
583       }
584       if (N >= i) {
585         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
586       }
587       rp[i] = col;
588       bap   = ap +  16*i;
589       for (ii=0; ii<4; ii++,value+=stepval) {
590         for (jj=ii; jj<16; jj+=4) {
591           bap[jj] = *value++;
592         }
593       }
594       noinsert2:;
595       low = i;
596     }
597     ailen[row] = nrow;
598   }
599 }
600 EXTERN_C_END
601 
602 #if defined(PETSC_HAVE_FORTRAN_CAPS)
603 #define matsetvalues4_ MATSETVALUES4
604 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
605 #define matsetvalues4_ matsetvalues4
606 #endif
607 
608 EXTERN_C_BEGIN
609 #undef __FUNCT__
610 #define __FUNCT__ "MatSetValues4_"
611 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
612 {
613   Mat         A = *AA;
614   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
615   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
616   int         *ai=a->i,*ailen=a->ilen;
617   int         *aj=a->j,brow,bcol;
618   int         ridx,cidx;
619   MatScalar   *ap,value,*aa=a->a,*bap;
620 
621   PetscFunctionBegin;
622   for (k=0; k<m; k++) { /* loop over added rows */
623     row  = im[k]; brow = row/4;
624     rp   = aj + ai[brow];
625     ap   = aa + 16*ai[brow];
626     nrow = ailen[brow];
627     low  = 0;
628     for (l=0; l<n; l++) { /* loop over added columns */
629       col = in[l]; bcol = col/4;
630       ridx = row % 4; cidx = col % 4;
631       value = v[l + k*n];
632       low = 0; high = nrow;
633       while (high-low > 7) {
634         t = (low+high)/2;
635         if (rp[t] > bcol) high = t;
636         else              low  = t;
637       }
638       for (i=low; i<high; i++) {
639         if (rp[i] > bcol) break;
640         if (rp[i] == bcol) {
641           bap  = ap +  16*i + 4*cidx + ridx;
642           *bap += value;
643           goto noinsert1;
644         }
645       }
646       N = nrow++ - 1;
647       /* shift up all the later entries in this row */
648       for (ii=N; ii>=i; ii--) {
649         rp[ii+1] = rp[ii];
650         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
651       }
652       if (N>=i) {
653         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
654       }
655       rp[i]                    = bcol;
656       ap[16*i + 4*cidx + ridx] = value;
657       noinsert1:;
658       low = i;
659     }
660     ailen[brow] = nrow;
661   }
662 }
663 EXTERN_C_END
664 
665 /*  UGLY, ugly, ugly
666    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
667    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
668    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
669    converts the entries into single precision and then calls ..._MatScalar() to put them
670    into the single precision data structures.
671 */
672 #if defined(PETSC_USE_MAT_SINGLE)
673 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
674 #else
675 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
676 #endif
677 
678 #define CHUNKSIZE  10
679 
680 /*
681      Checks for missing diagonals
682 */
683 #undef __FUNCT__
684 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
685 int MatMissingDiagonal_SeqBAIJ(Mat A)
686 {
687   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
688   int         *diag,*jj = a->j,i,ierr;
689 
690   PetscFunctionBegin;
691   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
692   diag = a->diag;
693   for (i=0; i<a->mbs; i++) {
694     if (jj[diag[i]] != i) {
695       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %d",i);
696     }
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
703 int MatMarkDiagonal_SeqBAIJ(Mat A)
704 {
705   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
706   int         i,j,*diag,m = a->mbs,ierr;
707 
708   PetscFunctionBegin;
709   if (a->diag) PetscFunctionReturn(0);
710 
711   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
712   PetscLogObjectMemory(A,(m+1)*sizeof(int));
713   for (i=0; i<m; i++) {
714     diag[i] = a->i[i+1];
715     for (j=a->i[i]; j<a->i[i+1]; j++) {
716       if (a->j[j] == i) {
717         diag[i] = j;
718         break;
719       }
720     }
721   }
722   a->diag = diag;
723   PetscFunctionReturn(0);
724 }
725 
726 
727 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
731 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
732 {
733   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
734   int         ierr,n = a->mbs,i;
735 
736   PetscFunctionBegin;
737   *nn = n;
738   if (!ia) PetscFunctionReturn(0);
739   if (symmetric) {
740     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
741   } else if (oshift == 1) {
742     /* temporarily add 1 to i and j indices */
743     int nz = a->i[n];
744     for (i=0; i<nz; i++) a->j[i]++;
745     for (i=0; i<n+1; i++) a->i[i]++;
746     *ia = a->i; *ja = a->j;
747   } else {
748     *ia = a->i; *ja = a->j;
749   }
750 
751   PetscFunctionReturn(0);
752 }
753 
754 #undef __FUNCT__
755 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
756 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
757 {
758   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
759   int         i,n = a->mbs,ierr;
760 
761   PetscFunctionBegin;
762   if (!ia) PetscFunctionReturn(0);
763   if (symmetric) {
764     ierr = PetscFree(*ia);CHKERRQ(ierr);
765     ierr = PetscFree(*ja);CHKERRQ(ierr);
766   } else if (oshift == 1) {
767     int nz = a->i[n]-1;
768     for (i=0; i<nz; i++) a->j[i]--;
769     for (i=0; i<n+1; i++) a->i[i]--;
770   }
771   PetscFunctionReturn(0);
772 }
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
776 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
777 {
778   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
779 
780   PetscFunctionBegin;
781   *bs = baij->bs;
782   PetscFunctionReturn(0);
783 }
784 
785 
786 #undef __FUNCT__
787 #define __FUNCT__ "MatDestroy_SeqBAIJ"
788 int MatDestroy_SeqBAIJ(Mat A)
789 {
790   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
791   int         ierr;
792 
793   PetscFunctionBegin;
794 #if defined(PETSC_USE_LOG)
795   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
796 #endif
797   ierr = PetscFree(a->a);CHKERRQ(ierr);
798   if (!a->singlemalloc) {
799     ierr = PetscFree(a->i);CHKERRQ(ierr);
800     ierr = PetscFree(a->j);CHKERRQ(ierr);
801   }
802   if (a->row) {
803     ierr = ISDestroy(a->row);CHKERRQ(ierr);
804   }
805   if (a->col) {
806     ierr = ISDestroy(a->col);CHKERRQ(ierr);
807   }
808   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
809   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
810   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
811   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
812   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
813   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
814   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
815   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
816 #if defined(PETSC_USE_MAT_SINGLE)
817   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
818 #endif
819   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
820 
821   ierr = PetscFree(a);CHKERRQ(ierr);
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "MatSetOption_SeqBAIJ"
827 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
828 {
829   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
830 
831   PetscFunctionBegin;
832   switch (op) {
833   case MAT_ROW_ORIENTED:
834     a->roworiented    = PETSC_TRUE;
835     break;
836   case MAT_COLUMN_ORIENTED:
837     a->roworiented    = PETSC_FALSE;
838     break;
839   case MAT_COLUMNS_SORTED:
840     a->sorted         = PETSC_TRUE;
841     break;
842   case MAT_COLUMNS_UNSORTED:
843     a->sorted         = PETSC_FALSE;
844     break;
845   case MAT_KEEP_ZEROED_ROWS:
846     a->keepzeroedrows = PETSC_TRUE;
847     break;
848   case MAT_NO_NEW_NONZERO_LOCATIONS:
849     a->nonew          = 1;
850     break;
851   case MAT_NEW_NONZERO_LOCATION_ERR:
852     a->nonew          = -1;
853     break;
854   case MAT_NEW_NONZERO_ALLOCATION_ERR:
855     a->nonew          = -2;
856     break;
857   case MAT_YES_NEW_NONZERO_LOCATIONS:
858     a->nonew          = 0;
859     break;
860   case MAT_ROWS_SORTED:
861   case MAT_ROWS_UNSORTED:
862   case MAT_YES_NEW_DIAGONALS:
863   case MAT_IGNORE_OFF_PROC_ENTRIES:
864   case MAT_USE_HASH_TABLE:
865     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
866     break;
867   case MAT_NO_NEW_DIAGONALS:
868     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
869   case MAT_SYMMETRIC:
870   case MAT_STRUCTURALLY_SYMMETRIC:
871   case MAT_NOT_SYMMETRIC:
872   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
873   case MAT_HERMITIAN:
874   case MAT_NOT_HERMITIAN:
875   case MAT_SYMMETRY_ETERNAL:
876   case MAT_NOT_SYMMETRY_ETERNAL:
877     break;
878   default:
879     SETERRQ(PETSC_ERR_SUP,"unknown option");
880   }
881   PetscFunctionReturn(0);
882 }
883 
884 #undef __FUNCT__
885 #define __FUNCT__ "MatGetRow_SeqBAIJ"
886 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
887 {
888   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
889   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
890   MatScalar    *aa,*aa_i;
891   PetscScalar  *v_i;
892 
893   PetscFunctionBegin;
894   bs  = a->bs;
895   ai  = a->i;
896   aj  = a->j;
897   aa  = a->a;
898   bs2 = a->bs2;
899 
900   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
901 
902   bn  = row/bs;   /* Block number */
903   bp  = row % bs; /* Block Position */
904   M   = ai[bn+1] - ai[bn];
905   *nz = bs*M;
906 
907   if (v) {
908     *v = 0;
909     if (*nz) {
910       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
911       for (i=0; i<M; i++) { /* for each block in the block row */
912         v_i  = *v + i*bs;
913         aa_i = aa + bs2*(ai[bn] + i);
914         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
915       }
916     }
917   }
918 
919   if (idx) {
920     *idx = 0;
921     if (*nz) {
922       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
923       for (i=0; i<M; i++) { /* for each block in the block row */
924         idx_i = *idx + i*bs;
925         itmp  = bs*aj[ai[bn] + i];
926         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
927       }
928     }
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
935 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
936 {
937   int ierr;
938 
939   PetscFunctionBegin;
940   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
941   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatTranspose_SeqBAIJ"
947 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
948 {
949   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
950   Mat         C;
951   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
952   int         *rows,*cols,bs2=a->bs2;
953   PetscScalar *array;
954 
955   PetscFunctionBegin;
956   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
957   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
958   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
959 
960 #if defined(PETSC_USE_MAT_SINGLE)
961   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
962   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
963 #else
964   array = a->a;
965 #endif
966 
967   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
968   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
969   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
970   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
971   ierr = PetscFree(col);CHKERRQ(ierr);
972   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
973   cols = rows + bs;
974   for (i=0; i<mbs; i++) {
975     cols[0] = i*bs;
976     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
977     len = ai[i+1] - ai[i];
978     for (j=0; j<len; j++) {
979       rows[0] = (*aj++)*bs;
980       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
981       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
982       array += bs2;
983     }
984   }
985   ierr = PetscFree(rows);CHKERRQ(ierr);
986 #if defined(PETSC_USE_MAT_SINGLE)
987   ierr = PetscFree(array);
988 #endif
989 
990   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
991   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
992 
993   if (B) {
994     *B = C;
995   } else {
996     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1003 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1004 {
1005   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1006   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
1007   PetscScalar *aa;
1008   FILE        *file;
1009 
1010   PetscFunctionBegin;
1011   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1012   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
1013   col_lens[0] = MAT_FILE_COOKIE;
1014 
1015   col_lens[1] = A->m;
1016   col_lens[2] = A->n;
1017   col_lens[3] = a->nz*bs2;
1018 
1019   /* store lengths of each row and write (including header) to file */
1020   count = 0;
1021   for (i=0; i<a->mbs; i++) {
1022     for (j=0; j<bs; j++) {
1023       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1024     }
1025   }
1026   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
1027   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1028 
1029   /* store column indices (zero start index) */
1030   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
1031   count = 0;
1032   for (i=0; i<a->mbs; i++) {
1033     for (j=0; j<bs; j++) {
1034       for (k=a->i[i]; k<a->i[i+1]; k++) {
1035         for (l=0; l<bs; l++) {
1036           jj[count++] = bs*a->j[k] + l;
1037         }
1038       }
1039     }
1040   }
1041   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
1042   ierr = PetscFree(jj);CHKERRQ(ierr);
1043 
1044   /* store nonzero values */
1045   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1046   count = 0;
1047   for (i=0; i<a->mbs; i++) {
1048     for (j=0; j<bs; j++) {
1049       for (k=a->i[i]; k<a->i[i+1]; k++) {
1050         for (l=0; l<bs; l++) {
1051           aa[count++] = a->a[bs2*k + l*bs + j];
1052         }
1053       }
1054     }
1055   }
1056   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
1057   ierr = PetscFree(aa);CHKERRQ(ierr);
1058 
1059   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1060   if (file) {
1061     fprintf(file,"-matload_block_size %d\n",a->bs);
1062   }
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1068 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1069 {
1070   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1071   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
1072   PetscViewerFormat format;
1073 
1074   PetscFunctionBegin;
1075   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1076   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1077     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1078   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1079     Mat aij;
1080     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
1081     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1082     ierr = MatDestroy(aij);CHKERRQ(ierr);
1083   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1084      PetscFunctionReturn(0);
1085   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1086     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1087     for (i=0; i<a->mbs; i++) {
1088       for (j=0; j<bs; j++) {
1089         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
1090         for (k=a->i[i]; k<a->i[i+1]; k++) {
1091           for (l=0; l<bs; l++) {
1092 #if defined(PETSC_USE_COMPLEX)
1093             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1094               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
1095                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1096             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1097               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
1098                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1099             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1100               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1101             }
1102 #else
1103             if (a->a[bs2*k + l*bs + j] != 0.0) {
1104               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1105             }
1106 #endif
1107           }
1108         }
1109         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1110       }
1111     }
1112     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1113   } else {
1114     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1115     for (i=0; i<a->mbs; i++) {
1116       for (j=0; j<bs; j++) {
1117         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
1118         for (k=a->i[i]; k<a->i[i+1]; k++) {
1119           for (l=0; l<bs; l++) {
1120 #if defined(PETSC_USE_COMPLEX)
1121             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1122               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
1123                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1124             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1125               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
1126                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1127             } else {
1128               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1129             }
1130 #else
1131             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1132 #endif
1133           }
1134         }
1135         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1136       }
1137     }
1138     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1139   }
1140   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1146 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1147 {
1148   Mat          A = (Mat) Aa;
1149   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
1150   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
1151   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1152   MatScalar    *aa;
1153   PetscViewer  viewer;
1154 
1155   PetscFunctionBegin;
1156 
1157   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1158   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1159 
1160   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1161 
1162   /* loop over matrix elements drawing boxes */
1163   color = PETSC_DRAW_BLUE;
1164   for (i=0,row=0; i<mbs; i++,row+=bs) {
1165     for (j=a->i[i]; j<a->i[i+1]; j++) {
1166       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1167       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1168       aa = a->a + j*bs2;
1169       for (k=0; k<bs; k++) {
1170         for (l=0; l<bs; l++) {
1171           if (PetscRealPart(*aa++) >=  0.) continue;
1172           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1173         }
1174       }
1175     }
1176   }
1177   color = PETSC_DRAW_CYAN;
1178   for (i=0,row=0; i<mbs; i++,row+=bs) {
1179     for (j=a->i[i]; j<a->i[i+1]; j++) {
1180       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1181       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1182       aa = a->a + j*bs2;
1183       for (k=0; k<bs; k++) {
1184         for (l=0; l<bs; l++) {
1185           if (PetscRealPart(*aa++) != 0.) continue;
1186           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1187         }
1188       }
1189     }
1190   }
1191 
1192   color = PETSC_DRAW_RED;
1193   for (i=0,row=0; i<mbs; i++,row+=bs) {
1194     for (j=a->i[i]; j<a->i[i+1]; j++) {
1195       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1196       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1197       aa = a->a + j*bs2;
1198       for (k=0; k<bs; k++) {
1199         for (l=0; l<bs; l++) {
1200           if (PetscRealPart(*aa++) <= 0.) continue;
1201           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1202         }
1203       }
1204     }
1205   }
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1211 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1212 {
1213   int          ierr;
1214   PetscReal    xl,yl,xr,yr,w,h;
1215   PetscDraw    draw;
1216   PetscTruth   isnull;
1217 
1218   PetscFunctionBegin;
1219 
1220   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1221   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1222 
1223   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1224   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1225   xr += w;    yr += h;  xl = -w;     yl = -h;
1226   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1227   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1228   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatView_SeqBAIJ"
1234 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1235 {
1236   int        ierr;
1237   PetscTruth iascii,isbinary,isdraw;
1238 
1239   PetscFunctionBegin;
1240   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1241   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1242   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1243   if (iascii){
1244     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1245   } else if (isbinary) {
1246     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1247   } else if (isdraw) {
1248     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1249   } else {
1250     Mat B;
1251     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
1252     ierr = MatView(B,viewer);CHKERRQ(ierr);
1253     ierr = MatDestroy(B);CHKERRQ(ierr);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1261 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
1262 {
1263   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1264   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1265   int        *ai = a->i,*ailen = a->ilen;
1266   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1267   MatScalar  *ap,*aa = a->a,zero = 0.0;
1268 
1269   PetscFunctionBegin;
1270   for (k=0; k<m; k++) { /* loop over rows */
1271     row  = im[k]; brow = row/bs;
1272     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1273     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
1274     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1275     nrow = ailen[brow];
1276     for (l=0; l<n; l++) { /* loop over columns */
1277       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1278       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
1279       col  = in[l] ;
1280       bcol = col/bs;
1281       cidx = col%bs;
1282       ridx = row%bs;
1283       high = nrow;
1284       low  = 0; /* assume unsorted */
1285       while (high-low > 5) {
1286         t = (low+high)/2;
1287         if (rp[t] > bcol) high = t;
1288         else             low  = t;
1289       }
1290       for (i=low; i<high; i++) {
1291         if (rp[i] > bcol) break;
1292         if (rp[i] == bcol) {
1293           *v++ = ap[bs2*i+bs*cidx+ridx];
1294           goto finished;
1295         }
1296       }
1297       *v++ = zero;
1298       finished:;
1299     }
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #if defined(PETSC_USE_MAT_SINGLE)
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1307 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
1308 {
1309   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
1310   int         ierr,i,N = m*n*b->bs2;
1311   MatScalar   *vsingle;
1312 
1313   PetscFunctionBegin;
1314   if (N > b->setvalueslen) {
1315     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1316     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1317     b->setvalueslen  = N;
1318   }
1319   vsingle = b->setvaluescopy;
1320   for (i=0; i<N; i++) {
1321     vsingle[i] = v[i];
1322   }
1323   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 #endif
1327 
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1331 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
1332 {
1333   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1334   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1335   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1336   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
1337   PetscTruth        roworiented=a->roworiented;
1338   const MatScalar   *value = v;
1339   MatScalar         *ap,*aa = a->a,*bap;
1340 
1341   PetscFunctionBegin;
1342   if (roworiented) {
1343     stepval = (n-1)*bs;
1344   } else {
1345     stepval = (m-1)*bs;
1346   }
1347   for (k=0; k<m; k++) { /* loop over added rows */
1348     row  = im[k];
1349     if (row < 0) continue;
1350 #if defined(PETSC_USE_BOPT_g)
1351     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
1352 #endif
1353     rp   = aj + ai[row];
1354     ap   = aa + bs2*ai[row];
1355     rmax = imax[row];
1356     nrow = ailen[row];
1357     low  = 0;
1358     for (l=0; l<n; l++) { /* loop over added columns */
1359       if (in[l] < 0) continue;
1360 #if defined(PETSC_USE_BOPT_g)
1361       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
1362 #endif
1363       col = in[l];
1364       if (roworiented) {
1365         value = v + k*(stepval+bs)*bs + l*bs;
1366       } else {
1367         value = v + l*(stepval+bs)*bs + k*bs;
1368       }
1369       if (!sorted) low = 0; high = nrow;
1370       while (high-low > 7) {
1371         t = (low+high)/2;
1372         if (rp[t] > col) high = t;
1373         else             low  = t;
1374       }
1375       for (i=low; i<high; i++) {
1376         if (rp[i] > col) break;
1377         if (rp[i] == col) {
1378           bap  = ap +  bs2*i;
1379           if (roworiented) {
1380             if (is == ADD_VALUES) {
1381               for (ii=0; ii<bs; ii++,value+=stepval) {
1382                 for (jj=ii; jj<bs2; jj+=bs) {
1383                   bap[jj] += *value++;
1384                 }
1385               }
1386             } else {
1387               for (ii=0; ii<bs; ii++,value+=stepval) {
1388                 for (jj=ii; jj<bs2; jj+=bs) {
1389                   bap[jj] = *value++;
1390                 }
1391               }
1392             }
1393           } else {
1394             if (is == ADD_VALUES) {
1395               for (ii=0; ii<bs; ii++,value+=stepval) {
1396                 for (jj=0; jj<bs; jj++) {
1397                   *bap++ += *value++;
1398                 }
1399               }
1400             } else {
1401               for (ii=0; ii<bs; ii++,value+=stepval) {
1402                 for (jj=0; jj<bs; jj++) {
1403                   *bap++  = *value++;
1404                 }
1405               }
1406             }
1407           }
1408           goto noinsert2;
1409         }
1410       }
1411       if (nonew == 1) goto noinsert2;
1412       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1413       if (nrow >= rmax) {
1414         /* there is no extra room in row, therefore enlarge */
1415         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1416         MatScalar *new_a;
1417 
1418         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1419 
1420         /* malloc new storage space */
1421         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1422 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1423         new_j   = (int*)(new_a + bs2*new_nz);
1424         new_i   = new_j + new_nz;
1425 
1426         /* copy over old data into new slots */
1427         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1428         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1429         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
1430         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1431         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
1432         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1433         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1434         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1435         /* free up old matrix storage */
1436         ierr = PetscFree(a->a);CHKERRQ(ierr);
1437         if (!a->singlemalloc) {
1438           ierr = PetscFree(a->i);CHKERRQ(ierr);
1439           ierr = PetscFree(a->j);CHKERRQ(ierr);
1440         }
1441         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1442         a->singlemalloc = PETSC_TRUE;
1443 
1444         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1445         rmax = imax[row] = imax[row] + CHUNKSIZE;
1446         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1447         a->maxnz += bs2*CHUNKSIZE;
1448         a->reallocs++;
1449         a->nz++;
1450       }
1451       N = nrow++ - 1;
1452       /* shift up all the later entries in this row */
1453       for (ii=N; ii>=i; ii--) {
1454         rp[ii+1] = rp[ii];
1455         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1456       }
1457       if (N >= i) {
1458         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1459       }
1460       rp[i] = col;
1461       bap   = ap +  bs2*i;
1462       if (roworiented) {
1463         for (ii=0; ii<bs; ii++,value+=stepval) {
1464           for (jj=ii; jj<bs2; jj+=bs) {
1465             bap[jj] = *value++;
1466           }
1467         }
1468       } else {
1469         for (ii=0; ii<bs; ii++,value+=stepval) {
1470           for (jj=0; jj<bs; jj++) {
1471             *bap++  = *value++;
1472           }
1473         }
1474       }
1475       noinsert2:;
1476       low = i;
1477     }
1478     ailen[row] = nrow;
1479   }
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 #undef __FUNCT__
1484 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1485 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1486 {
1487   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1488   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1489   int        m = A->m,*ip,N,*ailen = a->ilen;
1490   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
1491   MatScalar  *aa = a->a,*ap;
1492 
1493   PetscFunctionBegin;
1494   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1495 
1496   if (m) rmax = ailen[0];
1497   for (i=1; i<mbs; i++) {
1498     /* move each row back by the amount of empty slots (fshift) before it*/
1499     fshift += imax[i-1] - ailen[i-1];
1500     rmax   = PetscMax(rmax,ailen[i]);
1501     if (fshift) {
1502       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1503       N = ailen[i];
1504       for (j=0; j<N; j++) {
1505         ip[j-fshift] = ip[j];
1506         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1507       }
1508     }
1509     ai[i] = ai[i-1] + ailen[i-1];
1510   }
1511   if (mbs) {
1512     fshift += imax[mbs-1] - ailen[mbs-1];
1513     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1514   }
1515   /* reset ilen and imax for each row */
1516   for (i=0; i<mbs; i++) {
1517     ailen[i] = imax[i] = ai[i+1] - ai[i];
1518   }
1519   a->nz = ai[mbs];
1520 
1521   /* diagonals may have moved, so kill the diagonal pointers */
1522   a->idiagvalid = PETSC_FALSE;
1523   if (fshift && a->diag) {
1524     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1525     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1526     a->diag = 0;
1527   }
1528   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1529   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1530   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1531   a->reallocs          = 0;
1532   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1533 
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 
1538 
1539 /*
1540    This function returns an array of flags which indicate the locations of contiguous
1541    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1542    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1543    Assume: sizes should be long enough to hold all the values.
1544 */
1545 #undef __FUNCT__
1546 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1547 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1548 {
1549   int        i,j,k,row;
1550   PetscTruth flg;
1551 
1552   PetscFunctionBegin;
1553   for (i=0,j=0; i<n; j++) {
1554     row = idx[i];
1555     if (row%bs!=0) { /* Not the begining of a block */
1556       sizes[j] = 1;
1557       i++;
1558     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1559       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1560       i++;
1561     } else { /* Begining of the block, so check if the complete block exists */
1562       flg = PETSC_TRUE;
1563       for (k=1; k<bs; k++) {
1564         if (row+k != idx[i+k]) { /* break in the block */
1565           flg = PETSC_FALSE;
1566           break;
1567         }
1568       }
1569       if (flg == PETSC_TRUE) { /* No break in the bs */
1570         sizes[j] = bs;
1571         i+= bs;
1572       } else {
1573         sizes[j] = 1;
1574         i++;
1575       }
1576     }
1577   }
1578   *bs_max = j;
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1584 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1585 {
1586   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1587   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1588   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1589   PetscScalar zero = 0.0;
1590   MatScalar   *aa;
1591 
1592   PetscFunctionBegin;
1593   /* Make a copy of the IS and  sort it */
1594   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1595   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1596 
1597   /* allocate memory for rows,sizes */
1598   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1599   sizes = rows + is_n;
1600 
1601   /* copy IS values to rows, and sort them */
1602   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1603   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1604   if (baij->keepzeroedrows) {
1605     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1606     bs_max = is_n;
1607   } else {
1608     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1609   }
1610   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1611 
1612   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1613     row   = rows[j];
1614     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1615     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1616     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1617     if (sizes[i] == bs && !baij->keepzeroedrows) {
1618       if (diag) {
1619         if (baij->ilen[row/bs] > 0) {
1620           baij->ilen[row/bs]       = 1;
1621           baij->j[baij->i[row/bs]] = row/bs;
1622           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1623         }
1624         /* Now insert all the diagonal values for this bs */
1625         for (k=0; k<bs; k++) {
1626           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1627         }
1628       } else { /* (!diag) */
1629         baij->ilen[row/bs] = 0;
1630       } /* end (!diag) */
1631     } else { /* (sizes[i] != bs) */
1632 #if defined (PETSC_USE_DEBUG)
1633       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1634 #endif
1635       for (k=0; k<count; k++) {
1636         aa[0] =  zero;
1637         aa    += bs;
1638       }
1639       if (diag) {
1640         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1641       }
1642     }
1643   }
1644 
1645   ierr = PetscFree(rows);CHKERRQ(ierr);
1646   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 #undef __FUNCT__
1651 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1652 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1653 {
1654   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1655   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1656   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1657   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1658   int         ridx,cidx,bs2=a->bs2,ierr;
1659   PetscTruth  roworiented=a->roworiented;
1660   MatScalar   *ap,value,*aa=a->a,*bap;
1661 
1662   PetscFunctionBegin;
1663   for (k=0; k<m; k++) { /* loop over added rows */
1664     row  = im[k]; brow = row/bs;
1665     if (row < 0) continue;
1666 #if defined(PETSC_USE_BOPT_g)
1667     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1668 #endif
1669     rp   = aj + ai[brow];
1670     ap   = aa + bs2*ai[brow];
1671     rmax = imax[brow];
1672     nrow = ailen[brow];
1673     low  = 0;
1674     for (l=0; l<n; l++) { /* loop over added columns */
1675       if (in[l] < 0) continue;
1676 #if defined(PETSC_USE_BOPT_g)
1677       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1678 #endif
1679       col = in[l]; bcol = col/bs;
1680       ridx = row % bs; cidx = col % bs;
1681       if (roworiented) {
1682         value = v[l + k*n];
1683       } else {
1684         value = v[k + l*m];
1685       }
1686       if (!sorted) low = 0; high = nrow;
1687       while (high-low > 7) {
1688         t = (low+high)/2;
1689         if (rp[t] > bcol) high = t;
1690         else              low  = t;
1691       }
1692       for (i=low; i<high; i++) {
1693         if (rp[i] > bcol) break;
1694         if (rp[i] == bcol) {
1695           bap  = ap +  bs2*i + bs*cidx + ridx;
1696           if (is == ADD_VALUES) *bap += value;
1697           else                  *bap  = value;
1698           goto noinsert1;
1699         }
1700       }
1701       if (nonew == 1) goto noinsert1;
1702       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1703       if (nrow >= rmax) {
1704         /* there is no extra room in row, therefore enlarge */
1705         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1706         MatScalar *new_a;
1707 
1708         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1709 
1710         /* Malloc new storage space */
1711         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1712 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1713         new_j   = (int*)(new_a + bs2*new_nz);
1714         new_i   = new_j + new_nz;
1715 
1716         /* copy over old data into new slots */
1717         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1718         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1719         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1720         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1721         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1722         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1723         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1724         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1725         /* free up old matrix storage */
1726         ierr = PetscFree(a->a);CHKERRQ(ierr);
1727         if (!a->singlemalloc) {
1728           ierr = PetscFree(a->i);CHKERRQ(ierr);
1729           ierr = PetscFree(a->j);CHKERRQ(ierr);
1730         }
1731         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1732         a->singlemalloc = PETSC_TRUE;
1733 
1734         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1735         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1736         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1737         a->maxnz += bs2*CHUNKSIZE;
1738         a->reallocs++;
1739         a->nz++;
1740       }
1741       N = nrow++ - 1;
1742       /* shift up all the later entries in this row */
1743       for (ii=N; ii>=i; ii--) {
1744         rp[ii+1] = rp[ii];
1745         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1746       }
1747       if (N>=i) {
1748         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1749       }
1750       rp[i]                      = bcol;
1751       ap[bs2*i + bs*cidx + ridx] = value;
1752       noinsert1:;
1753       low = i;
1754     }
1755     ailen[brow] = nrow;
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1763 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1764 {
1765   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1766   Mat         outA;
1767   int         ierr;
1768   PetscTruth  row_identity,col_identity;
1769 
1770   PetscFunctionBegin;
1771   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1772   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1773   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1774   if (!row_identity || !col_identity) {
1775     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1776   }
1777 
1778   outA          = inA;
1779   inA->factor   = FACTOR_LU;
1780 
1781   if (!a->diag) {
1782     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1783   }
1784 
1785   a->row        = row;
1786   a->col        = col;
1787   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1788   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1789 
1790   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1791   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1792   PetscLogObjectParent(inA,a->icol);
1793 
1794   /*
1795       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1796       for ILU(0) factorization with natural ordering
1797   */
1798   if (a->bs < 8) {
1799     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1800   } else {
1801     if (!a->solve_work) {
1802       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1803       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1804     }
1805   }
1806 
1807   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1808 
1809   PetscFunctionReturn(0);
1810 }
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1813 int MatPrintHelp_SeqBAIJ(Mat A)
1814 {
1815   static PetscTruth called = PETSC_FALSE;
1816   MPI_Comm          comm = A->comm;
1817   int               ierr;
1818 
1819   PetscFunctionBegin;
1820   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1821   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1822   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 EXTERN_C_BEGIN
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1829 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1830 {
1831   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1832   int         i,nz,nbs;
1833 
1834   PetscFunctionBegin;
1835   nz  = baij->maxnz/baij->bs2;
1836   nbs = baij->nbs;
1837   for (i=0; i<nz; i++) {
1838     baij->j[i] = indices[i];
1839   }
1840   baij->nz = nz;
1841   for (i=0; i<nbs; i++) {
1842     baij->ilen[i] = baij->imax[i];
1843   }
1844 
1845   PetscFunctionReturn(0);
1846 }
1847 EXTERN_C_END
1848 
1849 #undef __FUNCT__
1850 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1851 /*@
1852     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1853        in the matrix.
1854 
1855   Input Parameters:
1856 +  mat - the SeqBAIJ matrix
1857 -  indices - the column indices
1858 
1859   Level: advanced
1860 
1861   Notes:
1862     This can be called if you have precomputed the nonzero structure of the
1863   matrix and want to provide it to the matrix object to improve the performance
1864   of the MatSetValues() operation.
1865 
1866     You MUST have set the correct numbers of nonzeros per row in the call to
1867   MatCreateSeqBAIJ().
1868 
1869     MUST be called before any calls to MatSetValues();
1870 
1871 @*/
1872 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1873 {
1874   int ierr,(*f)(Mat,int *);
1875 
1876   PetscFunctionBegin;
1877   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1878   PetscValidPointer(indices,2);
1879   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1880   if (f) {
1881     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1882   } else {
1883     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1884   }
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 #undef __FUNCT__
1889 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1890 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1891 {
1892   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1893   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1894   PetscReal    atmp;
1895   PetscScalar  *x,zero = 0.0;
1896   MatScalar    *aa;
1897   int          ncols,brow,krow,kcol;
1898 
1899   PetscFunctionBegin;
1900   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1901   bs   = a->bs;
1902   aa   = a->a;
1903   ai   = a->i;
1904   aj   = a->j;
1905   mbs = a->mbs;
1906 
1907   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1908   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1909   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1910   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1911   for (i=0; i<mbs; i++) {
1912     ncols = ai[1] - ai[0]; ai++;
1913     brow  = bs*i;
1914     for (j=0; j<ncols; j++){
1915       /* bcol = bs*(*aj); */
1916       for (kcol=0; kcol<bs; kcol++){
1917         for (krow=0; krow<bs; krow++){
1918           atmp = PetscAbsScalar(*aa); aa++;
1919           row = brow + krow;    /* row index */
1920           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1921           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1922         }
1923       }
1924       aj++;
1925     }
1926   }
1927   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1933 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1934 {
1935   int        ierr;
1936 
1937   PetscFunctionBegin;
1938   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1944 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1945 {
1946   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1947   PetscFunctionBegin;
1948   *array = a->a;
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1954 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1955 {
1956   PetscFunctionBegin;
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #include "petscblaslapack.h"
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1963 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1964 {
1965   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1966   int          ierr,one=1,i,bs=y->bs,j,bs2;
1967 
1968   PetscFunctionBegin;
1969   if (str == SAME_NONZERO_PATTERN) {
1970     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1971   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1972     if (y->xtoy && y->XtoY != X) {
1973       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1974       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1975     }
1976     if (!y->xtoy) { /* get xtoy */
1977       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1978       y->XtoY = X;
1979     }
1980     bs2 = bs*bs;
1981     for (i=0; i<x->nz; i++) {
1982       j = 0;
1983       while (j < bs2){
1984         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1985         j++;
1986       }
1987     }
1988     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1989   } else {
1990     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1991   }
1992   PetscFunctionReturn(0);
1993 }
1994 
1995 /* -------------------------------------------------------------------*/
1996 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1997        MatGetRow_SeqBAIJ,
1998        MatRestoreRow_SeqBAIJ,
1999        MatMult_SeqBAIJ_N,
2000 /* 4*/ MatMultAdd_SeqBAIJ_N,
2001        MatMultTranspose_SeqBAIJ,
2002        MatMultTransposeAdd_SeqBAIJ,
2003        MatSolve_SeqBAIJ_N,
2004        0,
2005        0,
2006 /*10*/ 0,
2007        MatLUFactor_SeqBAIJ,
2008        0,
2009        0,
2010        MatTranspose_SeqBAIJ,
2011 /*15*/ MatGetInfo_SeqBAIJ,
2012        MatEqual_SeqBAIJ,
2013        MatGetDiagonal_SeqBAIJ,
2014        MatDiagonalScale_SeqBAIJ,
2015        MatNorm_SeqBAIJ,
2016 /*20*/ 0,
2017        MatAssemblyEnd_SeqBAIJ,
2018        0,
2019        MatSetOption_SeqBAIJ,
2020        MatZeroEntries_SeqBAIJ,
2021 /*25*/ MatZeroRows_SeqBAIJ,
2022        MatLUFactorSymbolic_SeqBAIJ,
2023        MatLUFactorNumeric_SeqBAIJ_N,
2024        0,
2025        0,
2026 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2027        MatILUFactorSymbolic_SeqBAIJ,
2028        0,
2029        MatGetArray_SeqBAIJ,
2030        MatRestoreArray_SeqBAIJ,
2031 /*35*/ MatDuplicate_SeqBAIJ,
2032        0,
2033        0,
2034        MatILUFactor_SeqBAIJ,
2035        0,
2036 /*40*/ MatAXPY_SeqBAIJ,
2037        MatGetSubMatrices_SeqBAIJ,
2038        MatIncreaseOverlap_SeqBAIJ,
2039        MatGetValues_SeqBAIJ,
2040        0,
2041 /*45*/ MatPrintHelp_SeqBAIJ,
2042        MatScale_SeqBAIJ,
2043        0,
2044        0,
2045        0,
2046 /*50*/ MatGetBlockSize_SeqBAIJ,
2047        MatGetRowIJ_SeqBAIJ,
2048        MatRestoreRowIJ_SeqBAIJ,
2049        0,
2050        0,
2051 /*55*/ 0,
2052        0,
2053        0,
2054        0,
2055        MatSetValuesBlocked_SeqBAIJ,
2056 /*60*/ MatGetSubMatrix_SeqBAIJ,
2057        MatDestroy_SeqBAIJ,
2058        MatView_SeqBAIJ,
2059        MatGetPetscMaps_Petsc,
2060        0,
2061 /*65*/ 0,
2062        0,
2063        0,
2064        0,
2065        0,
2066 /*70*/ MatGetRowMax_SeqBAIJ,
2067        MatConvert_Basic,
2068        0,
2069        0,
2070        0,
2071 /*75*/ 0,
2072        0,
2073        0,
2074        0,
2075        0,
2076 /*80*/ 0,
2077        0,
2078        0,
2079        0,
2080 /*85*/ MatLoad_SeqBAIJ,
2081        0,
2082        0,
2083        0,
2084        0
2085 };
2086 
2087 EXTERN_C_BEGIN
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2090 int MatStoreValues_SeqBAIJ(Mat mat)
2091 {
2092   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2093   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
2094   int         ierr;
2095 
2096   PetscFunctionBegin;
2097   if (aij->nonew != 1) {
2098     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2099   }
2100 
2101   /* allocate space for values if not already there */
2102   if (!aij->saved_values) {
2103     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2104   }
2105 
2106   /* copy values over */
2107   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2108   PetscFunctionReturn(0);
2109 }
2110 EXTERN_C_END
2111 
2112 EXTERN_C_BEGIN
2113 #undef __FUNCT__
2114 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2115 int MatRetrieveValues_SeqBAIJ(Mat mat)
2116 {
2117   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2118   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
2119 
2120   PetscFunctionBegin;
2121   if (aij->nonew != 1) {
2122     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2123   }
2124   if (!aij->saved_values) {
2125     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2126   }
2127 
2128   /* copy values over */
2129   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2130   PetscFunctionReturn(0);
2131 }
2132 EXTERN_C_END
2133 
2134 EXTERN_C_BEGIN
2135 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
2136 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*);
2137 EXTERN_C_END
2138 
2139 EXTERN_C_BEGIN
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2142 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
2143 {
2144   Mat_SeqBAIJ *b;
2145   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
2146   PetscTruth  flg;
2147 
2148   PetscFunctionBegin;
2149 
2150   B->preallocated = PETSC_TRUE;
2151   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2152   if (nnz && newbs != bs) {
2153     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2154   }
2155   bs   = newbs;
2156 
2157   mbs  = B->m/bs;
2158   nbs  = B->n/bs;
2159   bs2  = bs*bs;
2160 
2161   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2162     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
2163   }
2164 
2165   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2166   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
2167   if (nnz) {
2168     for (i=0; i<mbs; i++) {
2169       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
2170       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
2171     }
2172   }
2173 
2174   b       = (Mat_SeqBAIJ*)B->data;
2175   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2176   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2177   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2178   if (!flg) {
2179     switch (bs) {
2180     case 1:
2181       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2182       B->ops->mult            = MatMult_SeqBAIJ_1;
2183       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2184       break;
2185     case 2:
2186       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2187       B->ops->mult            = MatMult_SeqBAIJ_2;
2188       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2189       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2190       break;
2191     case 3:
2192       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2193       B->ops->mult            = MatMult_SeqBAIJ_3;
2194       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2195       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2196       break;
2197     case 4:
2198       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2199       B->ops->mult            = MatMult_SeqBAIJ_4;
2200       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2201       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2202       break;
2203     case 5:
2204       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2205       B->ops->mult            = MatMult_SeqBAIJ_5;
2206       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2207       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2208       break;
2209     case 6:
2210       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2211       B->ops->mult            = MatMult_SeqBAIJ_6;
2212       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2213       break;
2214     case 7:
2215       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2216       B->ops->mult            = MatMult_SeqBAIJ_7;
2217       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2218       break;
2219     default:
2220       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2221       B->ops->mult            = MatMult_SeqBAIJ_N;
2222       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2223       break;
2224     }
2225   }
2226   b->bs      = bs;
2227   b->mbs     = mbs;
2228   b->nbs     = nbs;
2229   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
2230   if (!nnz) {
2231     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2232     else if (nz <= 0)        nz = 1;
2233     for (i=0; i<mbs; i++) b->imax[i] = nz;
2234     nz = nz*mbs;
2235   } else {
2236     nz = 0;
2237     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2238   }
2239 
2240   /* allocate the matrix space */
2241   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2242   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2243   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2244   b->j  = (int*)(b->a + nz*bs2);
2245   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
2246   b->i  = b->j + nz;
2247   b->singlemalloc = PETSC_TRUE;
2248 
2249   b->i[0] = 0;
2250   for (i=1; i<mbs+1; i++) {
2251     b->i[i] = b->i[i-1] + b->imax[i-1];
2252   }
2253 
2254   /* b->ilen will count nonzeros in each block row so far. */
2255   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
2256   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2257   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2258 
2259   b->bs               = bs;
2260   b->bs2              = bs2;
2261   b->mbs              = mbs;
2262   b->nz               = 0;
2263   b->maxnz            = nz*bs2;
2264   B->info.nz_unneeded = (PetscReal)b->maxnz;
2265   PetscFunctionReturn(0);
2266 }
2267 EXTERN_C_END
2268 
2269 /*MC
2270    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2271    block sparse compressed row format.
2272 
2273    Options Database Keys:
2274 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2275 
2276   Level: beginner
2277 
2278 .seealso: MatCreateSeqBAIJ
2279 M*/
2280 
2281 EXTERN_C_BEGIN
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatCreate_SeqBAIJ"
2284 int MatCreate_SeqBAIJ(Mat B)
2285 {
2286   int         ierr,size;
2287   Mat_SeqBAIJ *b;
2288 
2289   PetscFunctionBegin;
2290   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2291   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2292 
2293   B->m = B->M = PetscMax(B->m,B->M);
2294   B->n = B->N = PetscMax(B->n,B->N);
2295   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2296   B->data = (void*)b;
2297   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2298   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2299   B->factor           = 0;
2300   B->lupivotthreshold = 1.0;
2301   B->mapping          = 0;
2302   b->row              = 0;
2303   b->col              = 0;
2304   b->icol             = 0;
2305   b->reallocs         = 0;
2306   b->saved_values     = 0;
2307 #if defined(PETSC_USE_MAT_SINGLE)
2308   b->setvalueslen     = 0;
2309   b->setvaluescopy    = PETSC_NULL;
2310 #endif
2311 
2312   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2313   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2314 
2315   b->sorted           = PETSC_FALSE;
2316   b->roworiented      = PETSC_TRUE;
2317   b->nonew            = 0;
2318   b->diag             = 0;
2319   b->solve_work       = 0;
2320   b->mult_work        = 0;
2321   B->spptr            = 0;
2322   B->info.nz_unneeded = (PetscReal)b->maxnz;
2323   b->keepzeroedrows   = PETSC_FALSE;
2324   b->xtoy              = 0;
2325   b->XtoY              = 0;
2326 
2327   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2328                                      "MatStoreValues_SeqBAIJ",
2329                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2330   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2331                                      "MatRetrieveValues_SeqBAIJ",
2332                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2333   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2334                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2335                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2336   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2337                                      "MatConvert_SeqBAIJ_SeqAIJ",
2338                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2339 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2340                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2341                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2342   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2343                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2344                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2345   PetscFunctionReturn(0);
2346 }
2347 EXTERN_C_END
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2351 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2352 {
2353   Mat         C;
2354   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
2355   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
2356 
2357   PetscFunctionBegin;
2358   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2359 
2360   *B = 0;
2361   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2362   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2363   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2364   c    = (Mat_SeqBAIJ*)C->data;
2365 
2366   C->M   = A->M;
2367   C->N   = A->N;
2368   c->bs  = a->bs;
2369   c->bs2 = a->bs2;
2370   c->mbs = a->mbs;
2371   c->nbs = a->nbs;
2372 
2373   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
2374   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
2375   for (i=0; i<mbs; i++) {
2376     c->imax[i] = a->imax[i];
2377     c->ilen[i] = a->ilen[i];
2378   }
2379 
2380   /* allocate the matrix space */
2381   c->singlemalloc = PETSC_TRUE;
2382   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
2383   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2384   c->j = (int*)(c->a + nz*bs2);
2385   c->i = c->j + nz;
2386   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
2387   if (mbs > 0) {
2388     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
2389     if (cpvalues == MAT_COPY_VALUES) {
2390       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2391     } else {
2392       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2393     }
2394   }
2395 
2396   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2397   c->sorted      = a->sorted;
2398   c->roworiented = a->roworiented;
2399   c->nonew       = a->nonew;
2400 
2401   if (a->diag) {
2402     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
2403     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
2404     for (i=0; i<mbs; i++) {
2405       c->diag[i] = a->diag[i];
2406     }
2407   } else c->diag        = 0;
2408   c->nz                 = a->nz;
2409   c->maxnz              = a->maxnz;
2410   c->solve_work         = 0;
2411   c->mult_work          = 0;
2412   C->preallocated       = PETSC_TRUE;
2413   C->assembled          = PETSC_TRUE;
2414   *B = C;
2415   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 #undef __FUNCT__
2420 #define __FUNCT__ "MatLoad_SeqBAIJ"
2421 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2422 {
2423   Mat_SeqBAIJ  *a;
2424   Mat          B;
2425   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
2426   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2427   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2428   int          *masked,nmask,tmp,bs2,ishift;
2429   PetscScalar  *aa;
2430   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2431 
2432   PetscFunctionBegin;
2433   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2434   bs2  = bs*bs;
2435 
2436   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2437   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2438   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2439   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2440   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2441   M = header[1]; N = header[2]; nz = header[3];
2442 
2443   if (header[3] < 0) {
2444     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2445   }
2446 
2447   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2448 
2449   /*
2450      This code adds extra rows to make sure the number of rows is
2451     divisible by the blocksize
2452   */
2453   mbs        = M/bs;
2454   extra_rows = bs - M + bs*(mbs);
2455   if (extra_rows == bs) extra_rows = 0;
2456   else                  mbs++;
2457   if (extra_rows) {
2458     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2459   }
2460 
2461   /* read in row lengths */
2462   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2463   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2464   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2465 
2466   /* read in column indices */
2467   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
2468   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2469   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2470 
2471   /* loop over row lengths determining block row lengths */
2472   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
2473   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
2474   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
2475   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
2476   masked   = mask + mbs;
2477   rowcount = 0; nzcount = 0;
2478   for (i=0; i<mbs; i++) {
2479     nmask = 0;
2480     for (j=0; j<bs; j++) {
2481       kmax = rowlengths[rowcount];
2482       for (k=0; k<kmax; k++) {
2483         tmp = jj[nzcount++]/bs;
2484         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2485       }
2486       rowcount++;
2487     }
2488     browlengths[i] += nmask;
2489     /* zero out the mask elements we set */
2490     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2491   }
2492 
2493   /* create our matrix */
2494   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2495   ierr = MatSetType(B,type);CHKERRQ(ierr);
2496   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2497   a = (Mat_SeqBAIJ*)B->data;
2498 
2499   /* set matrix "i" values */
2500   a->i[0] = 0;
2501   for (i=1; i<= mbs; i++) {
2502     a->i[i]      = a->i[i-1] + browlengths[i-1];
2503     a->ilen[i-1] = browlengths[i-1];
2504   }
2505   a->nz         = 0;
2506   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2507 
2508   /* read in nonzero values */
2509   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2510   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2511   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2512 
2513   /* set "a" and "j" values into matrix */
2514   nzcount = 0; jcount = 0;
2515   for (i=0; i<mbs; i++) {
2516     nzcountb = nzcount;
2517     nmask    = 0;
2518     for (j=0; j<bs; j++) {
2519       kmax = rowlengths[i*bs+j];
2520       for (k=0; k<kmax; k++) {
2521         tmp = jj[nzcount++]/bs;
2522 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2523       }
2524     }
2525     /* sort the masked values */
2526     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2527 
2528     /* set "j" values into matrix */
2529     maskcount = 1;
2530     for (j=0; j<nmask; j++) {
2531       a->j[jcount++]  = masked[j];
2532       mask[masked[j]] = maskcount++;
2533     }
2534     /* set "a" values into matrix */
2535     ishift = bs2*a->i[i];
2536     for (j=0; j<bs; j++) {
2537       kmax = rowlengths[i*bs+j];
2538       for (k=0; k<kmax; k++) {
2539         tmp       = jj[nzcountb]/bs ;
2540         block     = mask[tmp] - 1;
2541         point     = jj[nzcountb] - bs*tmp;
2542         idx       = ishift + bs2*block + j + bs*point;
2543         a->a[idx] = (MatScalar)aa[nzcountb++];
2544       }
2545     }
2546     /* zero out the mask elements we set */
2547     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2548   }
2549   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2550 
2551   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2552   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2553   ierr = PetscFree(aa);CHKERRQ(ierr);
2554   ierr = PetscFree(jj);CHKERRQ(ierr);
2555   ierr = PetscFree(mask);CHKERRQ(ierr);
2556 
2557   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2558   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2559   ierr = MatView_Private(B);CHKERRQ(ierr);
2560 
2561   *A = B;
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatCreateSeqBAIJ"
2567 /*@C
2568    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2569    compressed row) format.  For good matrix assembly performance the
2570    user should preallocate the matrix storage by setting the parameter nz
2571    (or the array nnz).  By setting these parameters accurately, performance
2572    during matrix assembly can be increased by more than a factor of 50.
2573 
2574    Collective on MPI_Comm
2575 
2576    Input Parameters:
2577 +  comm - MPI communicator, set to PETSC_COMM_SELF
2578 .  bs - size of block
2579 .  m - number of rows
2580 .  n - number of columns
2581 .  nz - number of nonzero blocks  per block row (same for all rows)
2582 -  nnz - array containing the number of nonzero blocks in the various block rows
2583          (possibly different for each block row) or PETSC_NULL
2584 
2585    Output Parameter:
2586 .  A - the matrix
2587 
2588    Options Database Keys:
2589 .   -mat_no_unroll - uses code that does not unroll the loops in the
2590                      block calculations (much slower)
2591 .    -mat_block_size - size of the blocks to use
2592 
2593    Level: intermediate
2594 
2595    Notes:
2596    A nonzero block is any block that as 1 or more nonzeros in it
2597 
2598    The block AIJ format is fully compatible with standard Fortran 77
2599    storage.  That is, the stored row and column indices can begin at
2600    either one (as in Fortran) or zero.  See the users' manual for details.
2601 
2602    Specify the preallocated storage with either nz or nnz (not both).
2603    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2604    allocation.  For additional details, see the users manual chapter on
2605    matrices.
2606 
2607 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2608 @*/
2609 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2610 {
2611   int ierr;
2612 
2613   PetscFunctionBegin;
2614   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2615   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2616   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 #undef __FUNCT__
2621 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2622 /*@C
2623    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2624    per row in the matrix. For good matrix assembly performance the
2625    user should preallocate the matrix storage by setting the parameter nz
2626    (or the array nnz).  By setting these parameters accurately, performance
2627    during matrix assembly can be increased by more than a factor of 50.
2628 
2629    Collective on MPI_Comm
2630 
2631    Input Parameters:
2632 +  A - the matrix
2633 .  bs - size of block
2634 .  nz - number of block nonzeros per block row (same for all rows)
2635 -  nnz - array containing the number of block nonzeros in the various block rows
2636          (possibly different for each block row) or PETSC_NULL
2637 
2638    Options Database Keys:
2639 .   -mat_no_unroll - uses code that does not unroll the loops in the
2640                      block calculations (much slower)
2641 .    -mat_block_size - size of the blocks to use
2642 
2643    Level: intermediate
2644 
2645    Notes:
2646    The block AIJ format is fully compatible with standard Fortran 77
2647    storage.  That is, the stored row and column indices can begin at
2648    either one (as in Fortran) or zero.  See the users' manual for details.
2649 
2650    Specify the preallocated storage with either nz or nnz (not both).
2651    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2652    allocation.  For additional details, see the users manual chapter on
2653    matrices.
2654 
2655 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2656 @*/
2657 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2658 {
2659   int ierr,(*f)(Mat,int,int,const int[]);
2660 
2661   PetscFunctionBegin;
2662   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2663   if (f) {
2664     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2665   }
2666   PetscFunctionReturn(0);
2667 }
2668 
2669