xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 72ce74bd30d5d09d1371a3eb387704f4af395d1a)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include "src/mat/impls/baij/seq/baij.h"
6 #include "src/inline/spops.h"
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 
9 #include "src/inline/ilu.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
14 {
15   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
16   int         *diag_offset,i,bs = a->bs,mbs = a->mbs,ierr;
17   PetscScalar *v = a->a,*odiag,*diag,*mdiag;
18 
19   PetscFunctionBegin;
20   if (a->idiagvalid) PetscFunctionReturn(0);
21   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
22   diag_offset = a->diag;
23   if (!a->idiag) {
24     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
25   }
26   diag  = a->idiag;
27   mdiag = a->idiag+bs*bs*mbs;
28   /* factor and invert each block */
29   switch (a->bs){
30     case 2:
31       for (i=0; i<mbs; i++) {
32         odiag   = v + 4*diag_offset[i];
33         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
34 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
35 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
36 	diag    += 4;
37 	mdiag   += 4;
38       }
39       break;
40     case 3:
41       for (i=0; i<mbs; i++) {
42         odiag    = v + 9*diag_offset[i];
43         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
44         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
45         diag[8]  = odiag[8];
46         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
47         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
48         mdiag[8] = odiag[8];
49 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
50 	diag    += 9;
51 	mdiag   += 9;
52       }
53       break;
54     case 4:
55       for (i=0; i<mbs; i++) {
56         odiag  = v + 16*diag_offset[i];
57         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
58         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
59 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
60 	diag  += 16;
61 	mdiag += 16;
62       }
63       break;
64     case 5:
65       for (i=0; i<mbs; i++) {
66         odiag = v + 25*diag_offset[i];
67         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
68         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
69 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
70 	diag  += 25;
71 	mdiag += 25;
72       }
73       break;
74     default:
75       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %d",a->bs);
76   }
77   a->idiagvalid = PETSC_TRUE;
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
83 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
84 {
85   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
86   PetscScalar        *x,x1,x2,s1,s2;
87   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
88   PetscErrorCode ierr;
89   int                m = a->mbs,i,i2,nz,idx;
90   const int          *diag,*ai = a->i,*aj = a->j,*vi;
91 
92   PetscFunctionBegin;
93   its = its*lits;
94   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
95   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
96   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
97   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
98   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
99 
100   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
101 
102   diag  = a->diag;
103   idiag = a->idiag;
104   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
105   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
106 
107   if (flag & SOR_ZERO_INITIAL_GUESS) {
108     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
109       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
110       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
111       i2     = 2;
112       idiag += 4;
113       for (i=1; i<m; i++) {
114 	v     = aa + 4*ai[i];
115 	vi    = aj + ai[i];
116 	nz    = diag[i] - ai[i];
117 	s1    = b[i2]; s2 = b[i2+1];
118 	while (nz--) {
119 	  idx  = 2*(*vi++);
120 	  x1   = x[idx]; x2 = x[1+idx];
121 	  s1  -= v[0]*x1 + v[2]*x2;
122 	  s2  -= v[1]*x1 + v[3]*x2;
123 	  v   += 4;
124 	}
125 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
126 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
127         idiag   += 4;
128         i2      += 2;
129       }
130       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
131       PetscLogFlops(4*(a->nz));
132     }
133     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
134         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
135       i2    = 0;
136       mdiag = a->idiag+4*a->mbs;
137       for (i=0; i<m; i++) {
138         x1      = x[i2]; x2 = x[i2+1];
139         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
140         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
141         mdiag  += 4;
142         i2     += 2;
143       }
144       PetscLogFlops(6*m);
145     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
146       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
147     }
148     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
149       idiag   = a->idiag+4*a->mbs - 4;
150       i2      = 2*m - 2;
151       x1      = x[i2]; x2 = x[i2+1];
152       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
153       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
154       idiag -= 4;
155       i2    -= 2;
156       for (i=m-2; i>=0; i--) {
157 	v     = aa + 4*(diag[i]+1);
158 	vi    = aj + diag[i] + 1;
159 	nz    = ai[i+1] - diag[i] - 1;
160 	s1    = x[i2]; s2 = x[i2+1];
161 	while (nz--) {
162 	  idx  = 2*(*vi++);
163 	  x1   = x[idx]; x2 = x[1+idx];
164 	  s1  -= v[0]*x1 + v[2]*x2;
165 	  s2  -= v[1]*x1 + v[3]*x2;
166 	  v   += 4;
167 	}
168 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
169 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
170         idiag   -= 4;
171         i2      -= 2;
172       }
173       PetscLogFlops(4*(a->nz));
174     }
175   } else {
176     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
177   }
178   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
179   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
180   PetscFunctionReturn(0);
181 }
182 
183 #undef __FUNCT__
184 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
185 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
186 {
187   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
188   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
189   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
190   PetscErrorCode ierr;
191   int                m = a->mbs,i,i2,nz,idx;
192   const int          *diag,*ai = a->i,*aj = a->j,*vi;
193 
194   PetscFunctionBegin;
195   its = its*lits;
196   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
197   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
198   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
199   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
200   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
201 
202   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
203 
204   diag  = a->diag;
205   idiag = a->idiag;
206   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
207   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
208 
209   if (flag & SOR_ZERO_INITIAL_GUESS) {
210     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
211       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
212       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
213       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
214       i2     = 3;
215       idiag += 9;
216       for (i=1; i<m; i++) {
217 	v     = aa + 9*ai[i];
218 	vi    = aj + ai[i];
219 	nz    = diag[i] - ai[i];
220 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
221 	while (nz--) {
222 	  idx  = 3*(*vi++);
223 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
224 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
225 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
226 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
227 	  v   += 9;
228 	}
229 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
230 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
231 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
232         idiag   += 9;
233         i2      += 3;
234       }
235       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
236       PetscLogFlops(9*(a->nz));
237     }
238     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
239         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
240       i2    = 0;
241       mdiag = a->idiag+9*a->mbs;
242       for (i=0; i<m; i++) {
243         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
244         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
245         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
246         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
247         mdiag  += 9;
248         i2     += 3;
249       }
250       PetscLogFlops(15*m);
251     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
252       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
253     }
254     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
255       idiag   = a->idiag+9*a->mbs - 9;
256       i2      = 3*m - 3;
257       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
258       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
259       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
260       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
261       idiag -= 9;
262       i2    -= 3;
263       for (i=m-2; i>=0; i--) {
264 	v     = aa + 9*(diag[i]+1);
265 	vi    = aj + diag[i] + 1;
266 	nz    = ai[i+1] - diag[i] - 1;
267 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
268 	while (nz--) {
269 	  idx  = 3*(*vi++);
270 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
271 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
272 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
273 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
274 	  v   += 9;
275 	}
276 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
277 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
278 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
279         idiag   -= 9;
280         i2      -= 3;
281       }
282       PetscLogFlops(9*(a->nz));
283     }
284   } else {
285     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
286   }
287   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
288   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
294 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
295 {
296   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
297   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
298   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
299   PetscErrorCode ierr;
300   int                m = a->mbs,i,i2,nz,idx;
301   const int          *diag,*ai = a->i,*aj = a->j,*vi;
302 
303   PetscFunctionBegin;
304   its = its*lits;
305   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
306   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
307   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
308   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
309   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
310 
311   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
312 
313   diag  = a->diag;
314   idiag = a->idiag;
315   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
316   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
317 
318   if (flag & SOR_ZERO_INITIAL_GUESS) {
319     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
320       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
321       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
322       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
323       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
324       i2     = 4;
325       idiag += 16;
326       for (i=1; i<m; i++) {
327 	v     = aa + 16*ai[i];
328 	vi    = aj + ai[i];
329 	nz    = diag[i] - ai[i];
330 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
331 	while (nz--) {
332 	  idx  = 4*(*vi++);
333 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
334 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
335 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
336 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
337 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
338 	  v   += 16;
339 	}
340 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
341 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
342 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
343 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
344         idiag   += 16;
345         i2      += 4;
346       }
347       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
348       PetscLogFlops(16*(a->nz));
349     }
350     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
351         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
352       i2    = 0;
353       mdiag = a->idiag+16*a->mbs;
354       for (i=0; i<m; i++) {
355         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
356         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
357         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
358         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
359         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
360         mdiag  += 16;
361         i2     += 4;
362       }
363       PetscLogFlops(28*m);
364     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
365       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
366     }
367     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
368       idiag   = a->idiag+16*a->mbs - 16;
369       i2      = 4*m - 4;
370       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
371       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
372       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
373       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
374       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
375       idiag -= 16;
376       i2    -= 4;
377       for (i=m-2; i>=0; i--) {
378 	v     = aa + 16*(diag[i]+1);
379 	vi    = aj + diag[i] + 1;
380 	nz    = ai[i+1] - diag[i] - 1;
381 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
382 	while (nz--) {
383 	  idx  = 4*(*vi++);
384 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
385 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
386 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
387 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
388 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
389 	  v   += 16;
390 	}
391 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
392 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
393 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
394 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
395         idiag   -= 16;
396         i2      -= 4;
397       }
398       PetscLogFlops(16*(a->nz));
399     }
400   } else {
401     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
402   }
403   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
404   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
410 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
411 {
412   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
413   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
414   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
415   PetscErrorCode ierr;
416   int                m = a->mbs,i,i2,nz,idx;
417   const int          *diag,*ai = a->i,*aj = a->j,*vi;
418 
419   PetscFunctionBegin;
420   its = its*lits;
421   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
422   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
423   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
424   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
425   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
426 
427   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
428 
429   diag  = a->diag;
430   idiag = a->idiag;
431   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
432   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
433 
434   if (flag & SOR_ZERO_INITIAL_GUESS) {
435     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
436       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
437       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
438       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
439       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
440       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
441       i2     = 5;
442       idiag += 25;
443       for (i=1; i<m; i++) {
444 	v     = aa + 25*ai[i];
445 	vi    = aj + ai[i];
446 	nz    = diag[i] - ai[i];
447 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
448 	while (nz--) {
449 	  idx  = 5*(*vi++);
450 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
451 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
452 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
453 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
454 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
455 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
456 	  v   += 25;
457 	}
458 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
459 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
460 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
461 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
462 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
463         idiag   += 25;
464         i2      += 5;
465       }
466       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
467       PetscLogFlops(25*(a->nz));
468     }
469     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
470         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
471       i2    = 0;
472       mdiag = a->idiag+25*a->mbs;
473       for (i=0; i<m; i++) {
474         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
475         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
476         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
477         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
478         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
479         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
480         mdiag  += 25;
481         i2     += 5;
482       }
483       PetscLogFlops(45*m);
484     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
485       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
486     }
487     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
488       idiag   = a->idiag+25*a->mbs - 25;
489       i2      = 5*m - 5;
490       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
491       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
492       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
493       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
494       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
495       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
496       idiag -= 25;
497       i2    -= 5;
498       for (i=m-2; i>=0; i--) {
499 	v     = aa + 25*(diag[i]+1);
500 	vi    = aj + diag[i] + 1;
501 	nz    = ai[i+1] - diag[i] - 1;
502 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
503 	while (nz--) {
504 	  idx  = 5*(*vi++);
505 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
506 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
507 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
508 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
509 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
510 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
511 	  v   += 25;
512 	}
513 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
514 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
515 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
516 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
517 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
518         idiag   -= 25;
519         i2      -= 5;
520       }
521       PetscLogFlops(25*(a->nz));
522     }
523   } else {
524     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
525   }
526   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
527   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
528   PetscFunctionReturn(0);
529 }
530 
531 /*
532     Special version for Fun3d sequential benchmark
533 */
534 #if defined(PETSC_HAVE_FORTRAN_CAPS)
535 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
536 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
537 #define matsetvaluesblocked4_ matsetvaluesblocked4
538 #endif
539 
540 EXTERN_C_BEGIN
541 #undef __FUNCT__
542 #define __FUNCT__ "matsetvaluesblocked4_"
543 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
544 {
545   Mat               A = *AA;
546   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
547   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
548   int               *ai=a->i,*ailen=a->ilen;
549   int               *aj=a->j,stepval;
550   const PetscScalar *value = v;
551   MatScalar         *ap,*aa = a->a,*bap;
552 
553   PetscFunctionBegin;
554   stepval = (n-1)*4;
555   for (k=0; k<m; k++) { /* loop over added rows */
556     row  = im[k];
557     rp   = aj + ai[row];
558     ap   = aa + 16*ai[row];
559     nrow = ailen[row];
560     low  = 0;
561     for (l=0; l<n; l++) { /* loop over added columns */
562       col = in[l];
563       value = v + k*(stepval+4)*4 + l*4;
564       low = 0; high = nrow;
565       while (high-low > 7) {
566         t = (low+high)/2;
567         if (rp[t] > col) high = t;
568         else             low  = t;
569       }
570       for (i=low; i<high; i++) {
571         if (rp[i] > col) break;
572         if (rp[i] == col) {
573           bap  = ap +  16*i;
574           for (ii=0; ii<4; ii++,value+=stepval) {
575             for (jj=ii; jj<16; jj+=4) {
576               bap[jj] += *value++;
577             }
578           }
579           goto noinsert2;
580         }
581       }
582       N = nrow++ - 1;
583       /* shift up all the later entries in this row */
584       for (ii=N; ii>=i; ii--) {
585         rp[ii+1] = rp[ii];
586         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
587       }
588       if (N >= i) {
589         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
590       }
591       rp[i] = col;
592       bap   = ap +  16*i;
593       for (ii=0; ii<4; ii++,value+=stepval) {
594         for (jj=ii; jj<16; jj+=4) {
595           bap[jj] = *value++;
596         }
597       }
598       noinsert2:;
599       low = i;
600     }
601     ailen[row] = nrow;
602   }
603 }
604 EXTERN_C_END
605 
606 #if defined(PETSC_HAVE_FORTRAN_CAPS)
607 #define matsetvalues4_ MATSETVALUES4
608 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
609 #define matsetvalues4_ matsetvalues4
610 #endif
611 
612 EXTERN_C_BEGIN
613 #undef __FUNCT__
614 #define __FUNCT__ "MatSetValues4_"
615 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
616 {
617   Mat         A = *AA;
618   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
619   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
620   int         *ai=a->i,*ailen=a->ilen;
621   int         *aj=a->j,brow,bcol;
622   int         ridx,cidx;
623   MatScalar   *ap,value,*aa=a->a,*bap;
624 
625   PetscFunctionBegin;
626   for (k=0; k<m; k++) { /* loop over added rows */
627     row  = im[k]; brow = row/4;
628     rp   = aj + ai[brow];
629     ap   = aa + 16*ai[brow];
630     nrow = ailen[brow];
631     low  = 0;
632     for (l=0; l<n; l++) { /* loop over added columns */
633       col = in[l]; bcol = col/4;
634       ridx = row % 4; cidx = col % 4;
635       value = v[l + k*n];
636       low = 0; high = nrow;
637       while (high-low > 7) {
638         t = (low+high)/2;
639         if (rp[t] > bcol) high = t;
640         else              low  = t;
641       }
642       for (i=low; i<high; i++) {
643         if (rp[i] > bcol) break;
644         if (rp[i] == bcol) {
645           bap  = ap +  16*i + 4*cidx + ridx;
646           *bap += value;
647           goto noinsert1;
648         }
649       }
650       N = nrow++ - 1;
651       /* shift up all the later entries in this row */
652       for (ii=N; ii>=i; ii--) {
653         rp[ii+1] = rp[ii];
654         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
655       }
656       if (N>=i) {
657         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
658       }
659       rp[i]                    = bcol;
660       ap[16*i + 4*cidx + ridx] = value;
661       noinsert1:;
662       low = i;
663     }
664     ailen[brow] = nrow;
665   }
666 }
667 EXTERN_C_END
668 
669 /*  UGLY, ugly, ugly
670    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
671    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
672    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
673    converts the entries into single precision and then calls ..._MatScalar() to put them
674    into the single precision data structures.
675 */
676 #if defined(PETSC_USE_MAT_SINGLE)
677 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
678 #else
679 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
680 #endif
681 
682 #define CHUNKSIZE  10
683 
684 /*
685      Checks for missing diagonals
686 */
687 #undef __FUNCT__
688 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
689 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
690 {
691   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
692   int         *diag,*jj = a->j,i,ierr;
693 
694   PetscFunctionBegin;
695   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
696   diag = a->diag;
697   for (i=0; i<a->mbs; i++) {
698     if (jj[diag[i]] != i) {
699       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %d",i);
700     }
701   }
702   PetscFunctionReturn(0);
703 }
704 
705 #undef __FUNCT__
706 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
707 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
708 {
709   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
710   int         i,j,*diag,m = a->mbs,ierr;
711 
712   PetscFunctionBegin;
713   if (a->diag) PetscFunctionReturn(0);
714 
715   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
716   PetscLogObjectMemory(A,(m+1)*sizeof(int));
717   for (i=0; i<m; i++) {
718     diag[i] = a->i[i+1];
719     for (j=a->i[i]; j<a->i[i+1]; j++) {
720       if (a->j[j] == i) {
721         diag[i] = j;
722         break;
723       }
724     }
725   }
726   a->diag = diag;
727   PetscFunctionReturn(0);
728 }
729 
730 
731 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
732 
733 #undef __FUNCT__
734 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
735 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
736 {
737   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
738   PetscErrorCode ierr;
739   int         n = a->mbs,i;
740 
741   PetscFunctionBegin;
742   *nn = n;
743   if (!ia) PetscFunctionReturn(0);
744   if (symmetric) {
745     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
746   } else if (oshift == 1) {
747     /* temporarily add 1 to i and j indices */
748     int nz = a->i[n];
749     for (i=0; i<nz; i++) a->j[i]++;
750     for (i=0; i<n+1; i++) a->i[i]++;
751     *ia = a->i; *ja = a->j;
752   } else {
753     *ia = a->i; *ja = a->j;
754   }
755 
756   PetscFunctionReturn(0);
757 }
758 
759 #undef __FUNCT__
760 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
761 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
762 {
763   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
764   int         i,n = a->mbs,ierr;
765 
766   PetscFunctionBegin;
767   if (!ia) PetscFunctionReturn(0);
768   if (symmetric) {
769     ierr = PetscFree(*ia);CHKERRQ(ierr);
770     ierr = PetscFree(*ja);CHKERRQ(ierr);
771   } else if (oshift == 1) {
772     int nz = a->i[n]-1;
773     for (i=0; i<nz; i++) a->j[i]--;
774     for (i=0; i<n+1; i++) a->i[i]--;
775   }
776   PetscFunctionReturn(0);
777 }
778 
779 #undef __FUNCT__
780 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
781 PetscErrorCode MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
782 {
783   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
784 
785   PetscFunctionBegin;
786   *bs = baij->bs;
787   PetscFunctionReturn(0);
788 }
789 
790 
791 #undef __FUNCT__
792 #define __FUNCT__ "MatDestroy_SeqBAIJ"
793 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
794 {
795   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
796   PetscErrorCode ierr;
797 
798   PetscFunctionBegin;
799 #if defined(PETSC_USE_LOG)
800   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
801 #endif
802   ierr = PetscFree(a->a);CHKERRQ(ierr);
803   if (!a->singlemalloc) {
804     ierr = PetscFree(a->i);CHKERRQ(ierr);
805     ierr = PetscFree(a->j);CHKERRQ(ierr);
806   }
807   if (a->row) {
808     ierr = ISDestroy(a->row);CHKERRQ(ierr);
809   }
810   if (a->col) {
811     ierr = ISDestroy(a->col);CHKERRQ(ierr);
812   }
813   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
814   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
815   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
816   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
817   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
818   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
819   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
820   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
821 #if defined(PETSC_USE_MAT_SINGLE)
822   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
823 #endif
824   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
825 
826   ierr = PetscFree(a);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatSetOption_SeqBAIJ"
832 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
833 {
834   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
835 
836   PetscFunctionBegin;
837   switch (op) {
838   case MAT_ROW_ORIENTED:
839     a->roworiented    = PETSC_TRUE;
840     break;
841   case MAT_COLUMN_ORIENTED:
842     a->roworiented    = PETSC_FALSE;
843     break;
844   case MAT_COLUMNS_SORTED:
845     a->sorted         = PETSC_TRUE;
846     break;
847   case MAT_COLUMNS_UNSORTED:
848     a->sorted         = PETSC_FALSE;
849     break;
850   case MAT_KEEP_ZEROED_ROWS:
851     a->keepzeroedrows = PETSC_TRUE;
852     break;
853   case MAT_NO_NEW_NONZERO_LOCATIONS:
854     a->nonew          = 1;
855     break;
856   case MAT_NEW_NONZERO_LOCATION_ERR:
857     a->nonew          = -1;
858     break;
859   case MAT_NEW_NONZERO_ALLOCATION_ERR:
860     a->nonew          = -2;
861     break;
862   case MAT_YES_NEW_NONZERO_LOCATIONS:
863     a->nonew          = 0;
864     break;
865   case MAT_ROWS_SORTED:
866   case MAT_ROWS_UNSORTED:
867   case MAT_YES_NEW_DIAGONALS:
868   case MAT_IGNORE_OFF_PROC_ENTRIES:
869   case MAT_USE_HASH_TABLE:
870     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
871     break;
872   case MAT_NO_NEW_DIAGONALS:
873     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
874   case MAT_SYMMETRIC:
875   case MAT_STRUCTURALLY_SYMMETRIC:
876   case MAT_NOT_SYMMETRIC:
877   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
878   case MAT_HERMITIAN:
879   case MAT_NOT_HERMITIAN:
880   case MAT_SYMMETRY_ETERNAL:
881   case MAT_NOT_SYMMETRY_ETERNAL:
882     break;
883   default:
884     SETERRQ(PETSC_ERR_SUP,"unknown option");
885   }
886   PetscFunctionReturn(0);
887 }
888 
889 #undef __FUNCT__
890 #define __FUNCT__ "MatGetRow_SeqBAIJ"
891 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
892 {
893   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
894   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
895   MatScalar    *aa,*aa_i;
896   PetscScalar  *v_i;
897 
898   PetscFunctionBegin;
899   bs  = a->bs;
900   ai  = a->i;
901   aj  = a->j;
902   aa  = a->a;
903   bs2 = a->bs2;
904 
905   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
906 
907   bn  = row/bs;   /* Block number */
908   bp  = row % bs; /* Block Position */
909   M   = ai[bn+1] - ai[bn];
910   *nz = bs*M;
911 
912   if (v) {
913     *v = 0;
914     if (*nz) {
915       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
916       for (i=0; i<M; i++) { /* for each block in the block row */
917         v_i  = *v + i*bs;
918         aa_i = aa + bs2*(ai[bn] + i);
919         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
920       }
921     }
922   }
923 
924   if (idx) {
925     *idx = 0;
926     if (*nz) {
927       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
928       for (i=0; i<M; i++) { /* for each block in the block row */
929         idx_i = *idx + i*bs;
930         itmp  = bs*aj[ai[bn] + i];
931         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
932       }
933     }
934   }
935   PetscFunctionReturn(0);
936 }
937 
938 #undef __FUNCT__
939 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
940 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
941 {
942   PetscErrorCode ierr;
943 
944   PetscFunctionBegin;
945   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
946   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatTranspose_SeqBAIJ"
952 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
953 {
954   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
955   Mat         C;
956   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
957   int         *rows,*cols,bs2=a->bs2;
958   PetscScalar *array;
959 
960   PetscFunctionBegin;
961   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
962   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
963   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
964 
965 #if defined(PETSC_USE_MAT_SINGLE)
966   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
967   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
968 #else
969   array = a->a;
970 #endif
971 
972   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
973   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
974   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
975   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
976   ierr = PetscFree(col);CHKERRQ(ierr);
977   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
978   cols = rows + bs;
979   for (i=0; i<mbs; i++) {
980     cols[0] = i*bs;
981     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
982     len = ai[i+1] - ai[i];
983     for (j=0; j<len; j++) {
984       rows[0] = (*aj++)*bs;
985       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
986       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
987       array += bs2;
988     }
989   }
990   ierr = PetscFree(rows);CHKERRQ(ierr);
991 #if defined(PETSC_USE_MAT_SINGLE)
992   ierr = PetscFree(array);
993 #endif
994 
995   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
996   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
997 
998   if (B) {
999     *B = C;
1000   } else {
1001     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1002   }
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1008 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1009 {
1010   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1011   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
1012   PetscScalar *aa;
1013   FILE        *file;
1014 
1015   PetscFunctionBegin;
1016   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1017   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
1018   col_lens[0] = MAT_FILE_COOKIE;
1019 
1020   col_lens[1] = A->m;
1021   col_lens[2] = A->n;
1022   col_lens[3] = a->nz*bs2;
1023 
1024   /* store lengths of each row and write (including header) to file */
1025   count = 0;
1026   for (i=0; i<a->mbs; i++) {
1027     for (j=0; j<bs; j++) {
1028       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1029     }
1030   }
1031   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
1032   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1033 
1034   /* store column indices (zero start index) */
1035   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
1036   count = 0;
1037   for (i=0; i<a->mbs; i++) {
1038     for (j=0; j<bs; j++) {
1039       for (k=a->i[i]; k<a->i[i+1]; k++) {
1040         for (l=0; l<bs; l++) {
1041           jj[count++] = bs*a->j[k] + l;
1042         }
1043       }
1044     }
1045   }
1046   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
1047   ierr = PetscFree(jj);CHKERRQ(ierr);
1048 
1049   /* store nonzero values */
1050   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1051   count = 0;
1052   for (i=0; i<a->mbs; i++) {
1053     for (j=0; j<bs; j++) {
1054       for (k=a->i[i]; k<a->i[i+1]; k++) {
1055         for (l=0; l<bs; l++) {
1056           aa[count++] = a->a[bs2*k + l*bs + j];
1057         }
1058       }
1059     }
1060   }
1061   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
1062   ierr = PetscFree(aa);CHKERRQ(ierr);
1063 
1064   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1065   if (file) {
1066     fprintf(file,"-matload_block_size %d\n",a->bs);
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1073 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1074 {
1075   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1076   PetscErrorCode ierr;
1077   int               i,j,bs = a->bs,k,l,bs2=a->bs2;
1078   PetscViewerFormat format;
1079 
1080   PetscFunctionBegin;
1081   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1082   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1083     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1084   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1085     Mat aij;
1086     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
1087     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1088     ierr = MatDestroy(aij);CHKERRQ(ierr);
1089   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1090      PetscFunctionReturn(0);
1091   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1092     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1093     for (i=0; i<a->mbs; i++) {
1094       for (j=0; j<bs; j++) {
1095         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
1096         for (k=a->i[i]; k<a->i[i+1]; k++) {
1097           for (l=0; l<bs; l++) {
1098 #if defined(PETSC_USE_COMPLEX)
1099             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1100               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
1101                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1102             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1103               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
1104                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1105             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1106               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1107             }
1108 #else
1109             if (a->a[bs2*k + l*bs + j] != 0.0) {
1110               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1111             }
1112 #endif
1113           }
1114         }
1115         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1116       }
1117     }
1118     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1119   } else {
1120     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1121     for (i=0; i<a->mbs; i++) {
1122       for (j=0; j<bs; j++) {
1123         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
1124         for (k=a->i[i]; k<a->i[i+1]; k++) {
1125           for (l=0; l<bs; l++) {
1126 #if defined(PETSC_USE_COMPLEX)
1127             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1128               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
1129                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1130             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1131               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
1132                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1133             } else {
1134               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1135             }
1136 #else
1137             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1138 #endif
1139           }
1140         }
1141         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1142       }
1143     }
1144     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1145   }
1146   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1152 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1153 {
1154   Mat          A = (Mat) Aa;
1155   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
1156   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
1157   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1158   MatScalar    *aa;
1159   PetscViewer  viewer;
1160 
1161   PetscFunctionBegin;
1162 
1163   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1164   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1165 
1166   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1167 
1168   /* loop over matrix elements drawing boxes */
1169   color = PETSC_DRAW_BLUE;
1170   for (i=0,row=0; i<mbs; i++,row+=bs) {
1171     for (j=a->i[i]; j<a->i[i+1]; j++) {
1172       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1173       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1174       aa = a->a + j*bs2;
1175       for (k=0; k<bs; k++) {
1176         for (l=0; l<bs; l++) {
1177           if (PetscRealPart(*aa++) >=  0.) continue;
1178           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1179         }
1180       }
1181     }
1182   }
1183   color = PETSC_DRAW_CYAN;
1184   for (i=0,row=0; i<mbs; i++,row+=bs) {
1185     for (j=a->i[i]; j<a->i[i+1]; j++) {
1186       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1187       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1188       aa = a->a + j*bs2;
1189       for (k=0; k<bs; k++) {
1190         for (l=0; l<bs; l++) {
1191           if (PetscRealPart(*aa++) != 0.) continue;
1192           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1193         }
1194       }
1195     }
1196   }
1197 
1198   color = PETSC_DRAW_RED;
1199   for (i=0,row=0; i<mbs; i++,row+=bs) {
1200     for (j=a->i[i]; j<a->i[i+1]; j++) {
1201       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1202       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1203       aa = a->a + j*bs2;
1204       for (k=0; k<bs; k++) {
1205         for (l=0; l<bs; l++) {
1206           if (PetscRealPart(*aa++) <= 0.) continue;
1207           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1208         }
1209       }
1210     }
1211   }
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 #undef __FUNCT__
1216 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1217 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1218 {
1219   PetscErrorCode ierr;
1220   PetscReal    xl,yl,xr,yr,w,h;
1221   PetscDraw    draw;
1222   PetscTruth   isnull;
1223 
1224   PetscFunctionBegin;
1225 
1226   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1227   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1228 
1229   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1230   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1231   xr += w;    yr += h;  xl = -w;     yl = -h;
1232   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1233   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1234   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatView_SeqBAIJ"
1240 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1241 {
1242   PetscErrorCode ierr;
1243   PetscTruth iascii,isbinary,isdraw;
1244 
1245   PetscFunctionBegin;
1246   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1247   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1248   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1249   if (iascii){
1250     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1251   } else if (isbinary) {
1252     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1253   } else if (isdraw) {
1254     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1255   } else {
1256     Mat B;
1257     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
1258     ierr = MatView(B,viewer);CHKERRQ(ierr);
1259     ierr = MatDestroy(B);CHKERRQ(ierr);
1260   }
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1267 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
1268 {
1269   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1270   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1271   int        *ai = a->i,*ailen = a->ilen;
1272   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1273   MatScalar  *ap,*aa = a->a,zero = 0.0;
1274 
1275   PetscFunctionBegin;
1276   for (k=0; k<m; k++) { /* loop over rows */
1277     row  = im[k]; brow = row/bs;
1278     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1279     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
1280     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1281     nrow = ailen[brow];
1282     for (l=0; l<n; l++) { /* loop over columns */
1283       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1284       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
1285       col  = in[l] ;
1286       bcol = col/bs;
1287       cidx = col%bs;
1288       ridx = row%bs;
1289       high = nrow;
1290       low  = 0; /* assume unsorted */
1291       while (high-low > 5) {
1292         t = (low+high)/2;
1293         if (rp[t] > bcol) high = t;
1294         else             low  = t;
1295       }
1296       for (i=low; i<high; i++) {
1297         if (rp[i] > bcol) break;
1298         if (rp[i] == bcol) {
1299           *v++ = ap[bs2*i+bs*cidx+ridx];
1300           goto finished;
1301         }
1302       }
1303       *v++ = zero;
1304       finished:;
1305     }
1306   }
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #if defined(PETSC_USE_MAT_SINGLE)
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1313 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
1314 {
1315   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
1316   PetscErrorCode ierr;
1317   int         i,N = m*n*b->bs2;
1318   MatScalar   *vsingle;
1319 
1320   PetscFunctionBegin;
1321   if (N > b->setvalueslen) {
1322     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1323     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1324     b->setvalueslen  = N;
1325   }
1326   vsingle = b->setvaluescopy;
1327   for (i=0; i<N; i++) {
1328     vsingle[i] = v[i];
1329   }
1330   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 #endif
1334 
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1338 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
1339 {
1340   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1341   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1342   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1343   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
1344   PetscTruth        roworiented=a->roworiented;
1345   const MatScalar   *value = v;
1346   MatScalar         *ap,*aa = a->a,*bap;
1347 
1348   PetscFunctionBegin;
1349   if (roworiented) {
1350     stepval = (n-1)*bs;
1351   } else {
1352     stepval = (m-1)*bs;
1353   }
1354   for (k=0; k<m; k++) { /* loop over added rows */
1355     row  = im[k];
1356     if (row < 0) continue;
1357 #if defined(PETSC_USE_BOPT_g)
1358     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
1359 #endif
1360     rp   = aj + ai[row];
1361     ap   = aa + bs2*ai[row];
1362     rmax = imax[row];
1363     nrow = ailen[row];
1364     low  = 0;
1365     for (l=0; l<n; l++) { /* loop over added columns */
1366       if (in[l] < 0) continue;
1367 #if defined(PETSC_USE_BOPT_g)
1368       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
1369 #endif
1370       col = in[l];
1371       if (roworiented) {
1372         value = v + k*(stepval+bs)*bs + l*bs;
1373       } else {
1374         value = v + l*(stepval+bs)*bs + k*bs;
1375       }
1376       if (!sorted) low = 0; high = nrow;
1377       while (high-low > 7) {
1378         t = (low+high)/2;
1379         if (rp[t] > col) high = t;
1380         else             low  = t;
1381       }
1382       for (i=low; i<high; i++) {
1383         if (rp[i] > col) break;
1384         if (rp[i] == col) {
1385           bap  = ap +  bs2*i;
1386           if (roworiented) {
1387             if (is == ADD_VALUES) {
1388               for (ii=0; ii<bs; ii++,value+=stepval) {
1389                 for (jj=ii; jj<bs2; jj+=bs) {
1390                   bap[jj] += *value++;
1391                 }
1392               }
1393             } else {
1394               for (ii=0; ii<bs; ii++,value+=stepval) {
1395                 for (jj=ii; jj<bs2; jj+=bs) {
1396                   bap[jj] = *value++;
1397                 }
1398               }
1399             }
1400           } else {
1401             if (is == ADD_VALUES) {
1402               for (ii=0; ii<bs; ii++,value+=stepval) {
1403                 for (jj=0; jj<bs; jj++) {
1404                   *bap++ += *value++;
1405                 }
1406               }
1407             } else {
1408               for (ii=0; ii<bs; ii++,value+=stepval) {
1409                 for (jj=0; jj<bs; jj++) {
1410                   *bap++  = *value++;
1411                 }
1412               }
1413             }
1414           }
1415           goto noinsert2;
1416         }
1417       }
1418       if (nonew == 1) goto noinsert2;
1419       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1420       if (nrow >= rmax) {
1421         /* there is no extra room in row, therefore enlarge */
1422         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1423         MatScalar *new_a;
1424 
1425         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1426 
1427         /* malloc new storage space */
1428         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1429 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1430         new_j   = (int*)(new_a + bs2*new_nz);
1431         new_i   = new_j + new_nz;
1432 
1433         /* copy over old data into new slots */
1434         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1435         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1436         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
1437         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1438         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
1439         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1440         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1441         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1442         /* free up old matrix storage */
1443         ierr = PetscFree(a->a);CHKERRQ(ierr);
1444         if (!a->singlemalloc) {
1445           ierr = PetscFree(a->i);CHKERRQ(ierr);
1446           ierr = PetscFree(a->j);CHKERRQ(ierr);
1447         }
1448         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1449         a->singlemalloc = PETSC_TRUE;
1450 
1451         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1452         rmax = imax[row] = imax[row] + CHUNKSIZE;
1453         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1454         a->maxnz += bs2*CHUNKSIZE;
1455         a->reallocs++;
1456         a->nz++;
1457       }
1458       N = nrow++ - 1;
1459       /* shift up all the later entries in this row */
1460       for (ii=N; ii>=i; ii--) {
1461         rp[ii+1] = rp[ii];
1462         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1463       }
1464       if (N >= i) {
1465         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1466       }
1467       rp[i] = col;
1468       bap   = ap +  bs2*i;
1469       if (roworiented) {
1470         for (ii=0; ii<bs; ii++,value+=stepval) {
1471           for (jj=ii; jj<bs2; jj+=bs) {
1472             bap[jj] = *value++;
1473           }
1474         }
1475       } else {
1476         for (ii=0; ii<bs; ii++,value+=stepval) {
1477           for (jj=0; jj<bs; jj++) {
1478             *bap++  = *value++;
1479           }
1480         }
1481       }
1482       noinsert2:;
1483       low = i;
1484     }
1485     ailen[row] = nrow;
1486   }
1487   PetscFunctionReturn(0);
1488 }
1489 
1490 #undef __FUNCT__
1491 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1492 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1493 {
1494   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1495   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1496   int        m = A->m,*ip,N,*ailen = a->ilen;
1497   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
1498   MatScalar  *aa = a->a,*ap;
1499 
1500   PetscFunctionBegin;
1501   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1502 
1503   if (m) rmax = ailen[0];
1504   for (i=1; i<mbs; i++) {
1505     /* move each row back by the amount of empty slots (fshift) before it*/
1506     fshift += imax[i-1] - ailen[i-1];
1507     rmax   = PetscMax(rmax,ailen[i]);
1508     if (fshift) {
1509       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1510       N = ailen[i];
1511       for (j=0; j<N; j++) {
1512         ip[j-fshift] = ip[j];
1513         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1514       }
1515     }
1516     ai[i] = ai[i-1] + ailen[i-1];
1517   }
1518   if (mbs) {
1519     fshift += imax[mbs-1] - ailen[mbs-1];
1520     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1521   }
1522   /* reset ilen and imax for each row */
1523   for (i=0; i<mbs; i++) {
1524     ailen[i] = imax[i] = ai[i+1] - ai[i];
1525   }
1526   a->nz = ai[mbs];
1527 
1528   /* diagonals may have moved, so kill the diagonal pointers */
1529   a->idiagvalid = PETSC_FALSE;
1530   if (fshift && a->diag) {
1531     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1532     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1533     a->diag = 0;
1534   }
1535   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1536   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1537   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1538   a->reallocs          = 0;
1539   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1540 
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 
1545 
1546 /*
1547    This function returns an array of flags which indicate the locations of contiguous
1548    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1549    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1550    Assume: sizes should be long enough to hold all the values.
1551 */
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1554 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1555 {
1556   int        i,j,k,row;
1557   PetscTruth flg;
1558 
1559   PetscFunctionBegin;
1560   for (i=0,j=0; i<n; j++) {
1561     row = idx[i];
1562     if (row%bs!=0) { /* Not the begining of a block */
1563       sizes[j] = 1;
1564       i++;
1565     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1566       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1567       i++;
1568     } else { /* Begining of the block, so check if the complete block exists */
1569       flg = PETSC_TRUE;
1570       for (k=1; k<bs; k++) {
1571         if (row+k != idx[i+k]) { /* break in the block */
1572           flg = PETSC_FALSE;
1573           break;
1574         }
1575       }
1576       if (flg == PETSC_TRUE) { /* No break in the bs */
1577         sizes[j] = bs;
1578         i+= bs;
1579       } else {
1580         sizes[j] = 1;
1581         i++;
1582       }
1583     }
1584   }
1585   *bs_max = j;
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1591 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1592 {
1593   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1594   PetscErrorCode ierr;
1595   int         i,j,k,count,is_n,*is_idx,*rows;
1596   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1597   PetscScalar zero = 0.0;
1598   MatScalar   *aa;
1599 
1600   PetscFunctionBegin;
1601   /* Make a copy of the IS and  sort it */
1602   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1603   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1604 
1605   /* allocate memory for rows,sizes */
1606   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1607   sizes = rows + is_n;
1608 
1609   /* copy IS values to rows, and sort them */
1610   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1611   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1612   if (baij->keepzeroedrows) {
1613     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1614     bs_max = is_n;
1615   } else {
1616     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1617   }
1618   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1619 
1620   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1621     row   = rows[j];
1622     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1623     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1624     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1625     if (sizes[i] == bs && !baij->keepzeroedrows) {
1626       if (diag) {
1627         if (baij->ilen[row/bs] > 0) {
1628           baij->ilen[row/bs]       = 1;
1629           baij->j[baij->i[row/bs]] = row/bs;
1630           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1631         }
1632         /* Now insert all the diagonal values for this bs */
1633         for (k=0; k<bs; k++) {
1634           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1635         }
1636       } else { /* (!diag) */
1637         baij->ilen[row/bs] = 0;
1638       } /* end (!diag) */
1639     } else { /* (sizes[i] != bs) */
1640 #if defined (PETSC_USE_DEBUG)
1641       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1642 #endif
1643       for (k=0; k<count; k++) {
1644         aa[0] =  zero;
1645         aa    += bs;
1646       }
1647       if (diag) {
1648         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1649       }
1650     }
1651   }
1652 
1653   ierr = PetscFree(rows);CHKERRQ(ierr);
1654   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1655   PetscFunctionReturn(0);
1656 }
1657 
1658 #undef __FUNCT__
1659 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1660 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1661 {
1662   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1663   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1664   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1665   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1666   int         ridx,cidx,bs2=a->bs2,ierr;
1667   PetscTruth  roworiented=a->roworiented;
1668   MatScalar   *ap,value,*aa=a->a,*bap;
1669 
1670   PetscFunctionBegin;
1671   for (k=0; k<m; k++) { /* loop over added rows */
1672     row  = im[k]; brow = row/bs;
1673     if (row < 0) continue;
1674 #if defined(PETSC_USE_BOPT_g)
1675     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1676 #endif
1677     rp   = aj + ai[brow];
1678     ap   = aa + bs2*ai[brow];
1679     rmax = imax[brow];
1680     nrow = ailen[brow];
1681     low  = 0;
1682     for (l=0; l<n; l++) { /* loop over added columns */
1683       if (in[l] < 0) continue;
1684 #if defined(PETSC_USE_BOPT_g)
1685       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1686 #endif
1687       col = in[l]; bcol = col/bs;
1688       ridx = row % bs; cidx = col % bs;
1689       if (roworiented) {
1690         value = v[l + k*n];
1691       } else {
1692         value = v[k + l*m];
1693       }
1694       if (!sorted) low = 0; high = nrow;
1695       while (high-low > 7) {
1696         t = (low+high)/2;
1697         if (rp[t] > bcol) high = t;
1698         else              low  = t;
1699       }
1700       for (i=low; i<high; i++) {
1701         if (rp[i] > bcol) break;
1702         if (rp[i] == bcol) {
1703           bap  = ap +  bs2*i + bs*cidx + ridx;
1704           if (is == ADD_VALUES) *bap += value;
1705           else                  *bap  = value;
1706           goto noinsert1;
1707         }
1708       }
1709       if (nonew == 1) goto noinsert1;
1710       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1711       if (nrow >= rmax) {
1712         /* there is no extra room in row, therefore enlarge */
1713         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1714         MatScalar *new_a;
1715 
1716         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1717 
1718         /* Malloc new storage space */
1719         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1720 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1721         new_j   = (int*)(new_a + bs2*new_nz);
1722         new_i   = new_j + new_nz;
1723 
1724         /* copy over old data into new slots */
1725         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1726         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1727         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1728         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1729         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1730         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1731         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1732         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1733         /* free up old matrix storage */
1734         ierr = PetscFree(a->a);CHKERRQ(ierr);
1735         if (!a->singlemalloc) {
1736           ierr = PetscFree(a->i);CHKERRQ(ierr);
1737           ierr = PetscFree(a->j);CHKERRQ(ierr);
1738         }
1739         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1740         a->singlemalloc = PETSC_TRUE;
1741 
1742         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1743         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1744         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1745         a->maxnz += bs2*CHUNKSIZE;
1746         a->reallocs++;
1747         a->nz++;
1748       }
1749       N = nrow++ - 1;
1750       /* shift up all the later entries in this row */
1751       for (ii=N; ii>=i; ii--) {
1752         rp[ii+1] = rp[ii];
1753         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1754       }
1755       if (N>=i) {
1756         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1757       }
1758       rp[i]                      = bcol;
1759       ap[bs2*i + bs*cidx + ridx] = value;
1760       noinsert1:;
1761       low = i;
1762     }
1763     ailen[brow] = nrow;
1764   }
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1771 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1772 {
1773   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1774   Mat         outA;
1775   PetscErrorCode ierr;
1776   PetscTruth  row_identity,col_identity;
1777 
1778   PetscFunctionBegin;
1779   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1780   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1781   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1782   if (!row_identity || !col_identity) {
1783     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1784   }
1785 
1786   outA          = inA;
1787   inA->factor   = FACTOR_LU;
1788 
1789   if (!a->diag) {
1790     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1791   }
1792 
1793   a->row        = row;
1794   a->col        = col;
1795   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1796   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1797 
1798   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1799   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1800   PetscLogObjectParent(inA,a->icol);
1801 
1802   /*
1803       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1804       for ILU(0) factorization with natural ordering
1805   */
1806   if (a->bs < 8) {
1807     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1808   } else {
1809     if (!a->solve_work) {
1810       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1811       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1812     }
1813   }
1814 
1815   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1816 
1817   PetscFunctionReturn(0);
1818 }
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1821 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1822 {
1823   static PetscTruth called = PETSC_FALSE;
1824   MPI_Comm          comm = A->comm;
1825   PetscErrorCode ierr;
1826 
1827   PetscFunctionBegin;
1828   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1829   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1830   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 EXTERN_C_BEGIN
1835 #undef __FUNCT__
1836 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1837 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1838 {
1839   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1840   int         i,nz,nbs;
1841 
1842   PetscFunctionBegin;
1843   nz  = baij->maxnz/baij->bs2;
1844   nbs = baij->nbs;
1845   for (i=0; i<nz; i++) {
1846     baij->j[i] = indices[i];
1847   }
1848   baij->nz = nz;
1849   for (i=0; i<nbs; i++) {
1850     baij->ilen[i] = baij->imax[i];
1851   }
1852 
1853   PetscFunctionReturn(0);
1854 }
1855 EXTERN_C_END
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1859 /*@
1860     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1861        in the matrix.
1862 
1863   Input Parameters:
1864 +  mat - the SeqBAIJ matrix
1865 -  indices - the column indices
1866 
1867   Level: advanced
1868 
1869   Notes:
1870     This can be called if you have precomputed the nonzero structure of the
1871   matrix and want to provide it to the matrix object to improve the performance
1872   of the MatSetValues() operation.
1873 
1874     You MUST have set the correct numbers of nonzeros per row in the call to
1875   MatCreateSeqBAIJ().
1876 
1877     MUST be called before any calls to MatSetValues();
1878 
1879 @*/
1880 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1881 {
1882   PetscErrorCode ierr,(*f)(Mat,int *);
1883 
1884   PetscFunctionBegin;
1885   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1886   PetscValidPointer(indices,2);
1887   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1888   if (f) {
1889     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1890   } else {
1891     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1892   }
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1898 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1899 {
1900   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1901   PetscErrorCode ierr;
1902   int          i,j,n,row,bs,*ai,*aj,mbs;
1903   PetscReal    atmp;
1904   PetscScalar  *x,zero = 0.0;
1905   MatScalar    *aa;
1906   int          ncols,brow,krow,kcol;
1907 
1908   PetscFunctionBegin;
1909   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1910   bs   = a->bs;
1911   aa   = a->a;
1912   ai   = a->i;
1913   aj   = a->j;
1914   mbs = a->mbs;
1915 
1916   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1917   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1918   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1919   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1920   for (i=0; i<mbs; i++) {
1921     ncols = ai[1] - ai[0]; ai++;
1922     brow  = bs*i;
1923     for (j=0; j<ncols; j++){
1924       /* bcol = bs*(*aj); */
1925       for (kcol=0; kcol<bs; kcol++){
1926         for (krow=0; krow<bs; krow++){
1927           atmp = PetscAbsScalar(*aa); aa++;
1928           row = brow + krow;    /* row index */
1929           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1930           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1931         }
1932       }
1933       aj++;
1934     }
1935   }
1936   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 #undef __FUNCT__
1941 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1942 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1943 {
1944   PetscErrorCode ierr;
1945 
1946   PetscFunctionBegin;
1947   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1953 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1954 {
1955   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1956   PetscFunctionBegin;
1957   *array = a->a;
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1963 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1964 {
1965   PetscFunctionBegin;
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 #include "petscblaslapack.h"
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1972 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1973 {
1974   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1975   PetscErrorCode ierr;
1976   int          one=1,i,bs=y->bs,j,bs2;
1977 
1978   PetscFunctionBegin;
1979   if (str == SAME_NONZERO_PATTERN) {
1980     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1981   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1982     if (y->xtoy && y->XtoY != X) {
1983       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1984       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1985     }
1986     if (!y->xtoy) { /* get xtoy */
1987       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1988       y->XtoY = X;
1989     }
1990     bs2 = bs*bs;
1991     for (i=0; i<x->nz; i++) {
1992       j = 0;
1993       while (j < bs2){
1994         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1995         j++;
1996       }
1997     }
1998     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1999   } else {
2000     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
2001   }
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 /* -------------------------------------------------------------------*/
2006 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2007        MatGetRow_SeqBAIJ,
2008        MatRestoreRow_SeqBAIJ,
2009        MatMult_SeqBAIJ_N,
2010 /* 4*/ MatMultAdd_SeqBAIJ_N,
2011        MatMultTranspose_SeqBAIJ,
2012        MatMultTransposeAdd_SeqBAIJ,
2013        MatSolve_SeqBAIJ_N,
2014        0,
2015        0,
2016 /*10*/ 0,
2017        MatLUFactor_SeqBAIJ,
2018        0,
2019        0,
2020        MatTranspose_SeqBAIJ,
2021 /*15*/ MatGetInfo_SeqBAIJ,
2022        MatEqual_SeqBAIJ,
2023        MatGetDiagonal_SeqBAIJ,
2024        MatDiagonalScale_SeqBAIJ,
2025        MatNorm_SeqBAIJ,
2026 /*20*/ 0,
2027        MatAssemblyEnd_SeqBAIJ,
2028        0,
2029        MatSetOption_SeqBAIJ,
2030        MatZeroEntries_SeqBAIJ,
2031 /*25*/ MatZeroRows_SeqBAIJ,
2032        MatLUFactorSymbolic_SeqBAIJ,
2033        MatLUFactorNumeric_SeqBAIJ_N,
2034        0,
2035        0,
2036 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2037        MatILUFactorSymbolic_SeqBAIJ,
2038        0,
2039        MatGetArray_SeqBAIJ,
2040        MatRestoreArray_SeqBAIJ,
2041 /*35*/ MatDuplicate_SeqBAIJ,
2042        0,
2043        0,
2044        MatILUFactor_SeqBAIJ,
2045        0,
2046 /*40*/ MatAXPY_SeqBAIJ,
2047        MatGetSubMatrices_SeqBAIJ,
2048        MatIncreaseOverlap_SeqBAIJ,
2049        MatGetValues_SeqBAIJ,
2050        0,
2051 /*45*/ MatPrintHelp_SeqBAIJ,
2052        MatScale_SeqBAIJ,
2053        0,
2054        0,
2055        0,
2056 /*50*/ MatGetBlockSize_SeqBAIJ,
2057        MatGetRowIJ_SeqBAIJ,
2058        MatRestoreRowIJ_SeqBAIJ,
2059        0,
2060        0,
2061 /*55*/ 0,
2062        0,
2063        0,
2064        0,
2065        MatSetValuesBlocked_SeqBAIJ,
2066 /*60*/ MatGetSubMatrix_SeqBAIJ,
2067        MatDestroy_SeqBAIJ,
2068        MatView_SeqBAIJ,
2069        MatGetPetscMaps_Petsc,
2070        0,
2071 /*65*/ 0,
2072        0,
2073        0,
2074        0,
2075        0,
2076 /*70*/ MatGetRowMax_SeqBAIJ,
2077        MatConvert_Basic,
2078        0,
2079        0,
2080        0,
2081 /*75*/ 0,
2082        0,
2083        0,
2084        0,
2085        0,
2086 /*80*/ 0,
2087        0,
2088        0,
2089        0,
2090 /*85*/ MatLoad_SeqBAIJ,
2091        0,
2092        0,
2093        0,
2094        0
2095 };
2096 
2097 EXTERN_C_BEGIN
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2100 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2101 {
2102   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2103   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
2104   PetscErrorCode ierr;
2105 
2106   PetscFunctionBegin;
2107   if (aij->nonew != 1) {
2108     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2109   }
2110 
2111   /* allocate space for values if not already there */
2112   if (!aij->saved_values) {
2113     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2114   }
2115 
2116   /* copy values over */
2117   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2118   PetscFunctionReturn(0);
2119 }
2120 EXTERN_C_END
2121 
2122 EXTERN_C_BEGIN
2123 #undef __FUNCT__
2124 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2125 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2126 {
2127   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2128   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
2129 
2130   PetscFunctionBegin;
2131   if (aij->nonew != 1) {
2132     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2133   }
2134   if (!aij->saved_values) {
2135     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2136   }
2137 
2138   /* copy values over */
2139   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2140   PetscFunctionReturn(0);
2141 }
2142 EXTERN_C_END
2143 
2144 EXTERN_C_BEGIN
2145 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
2146 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*);
2147 EXTERN_C_END
2148 
2149 EXTERN_C_BEGIN
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2152 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
2153 {
2154   Mat_SeqBAIJ *b;
2155   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
2156   PetscTruth  flg;
2157 
2158   PetscFunctionBegin;
2159 
2160   B->preallocated = PETSC_TRUE;
2161   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2162   if (nnz && newbs != bs) {
2163     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2164   }
2165   bs   = newbs;
2166 
2167   mbs  = B->m/bs;
2168   nbs  = B->n/bs;
2169   bs2  = bs*bs;
2170 
2171   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2172     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
2173   }
2174 
2175   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2176   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
2177   if (nnz) {
2178     for (i=0; i<mbs; i++) {
2179       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
2180       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
2181     }
2182   }
2183 
2184   b       = (Mat_SeqBAIJ*)B->data;
2185   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2186   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2187   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2188   if (!flg) {
2189     switch (bs) {
2190     case 1:
2191       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2192       B->ops->mult            = MatMult_SeqBAIJ_1;
2193       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2194       break;
2195     case 2:
2196       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2197       B->ops->mult            = MatMult_SeqBAIJ_2;
2198       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2199       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2200       break;
2201     case 3:
2202       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2203       B->ops->mult            = MatMult_SeqBAIJ_3;
2204       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2205       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2206       break;
2207     case 4:
2208       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2209       B->ops->mult            = MatMult_SeqBAIJ_4;
2210       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2211       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2212       break;
2213     case 5:
2214       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2215       B->ops->mult            = MatMult_SeqBAIJ_5;
2216       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2217       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2218       break;
2219     case 6:
2220       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2221       B->ops->mult            = MatMult_SeqBAIJ_6;
2222       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2223       break;
2224     case 7:
2225       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2226       B->ops->mult            = MatMult_SeqBAIJ_7;
2227       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2228       break;
2229     default:
2230       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2231       B->ops->mult            = MatMult_SeqBAIJ_N;
2232       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2233       break;
2234     }
2235   }
2236   b->bs      = bs;
2237   b->mbs     = mbs;
2238   b->nbs     = nbs;
2239   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
2240   if (!nnz) {
2241     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2242     else if (nz <= 0)        nz = 1;
2243     for (i=0; i<mbs; i++) b->imax[i] = nz;
2244     nz = nz*mbs;
2245   } else {
2246     nz = 0;
2247     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2248   }
2249 
2250   /* allocate the matrix space */
2251   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2252   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2253   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2254   b->j  = (int*)(b->a + nz*bs2);
2255   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
2256   b->i  = b->j + nz;
2257   b->singlemalloc = PETSC_TRUE;
2258 
2259   b->i[0] = 0;
2260   for (i=1; i<mbs+1; i++) {
2261     b->i[i] = b->i[i-1] + b->imax[i-1];
2262   }
2263 
2264   /* b->ilen will count nonzeros in each block row so far. */
2265   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
2266   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2267   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2268 
2269   b->bs               = bs;
2270   b->bs2              = bs2;
2271   b->mbs              = mbs;
2272   b->nz               = 0;
2273   b->maxnz            = nz*bs2;
2274   B->info.nz_unneeded = (PetscReal)b->maxnz;
2275   PetscFunctionReturn(0);
2276 }
2277 EXTERN_C_END
2278 
2279 /*MC
2280    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2281    block sparse compressed row format.
2282 
2283    Options Database Keys:
2284 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2285 
2286   Level: beginner
2287 
2288 .seealso: MatCreateSeqBAIJ
2289 M*/
2290 
2291 EXTERN_C_BEGIN
2292 #undef __FUNCT__
2293 #define __FUNCT__ "MatCreate_SeqBAIJ"
2294 PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2295 {
2296   PetscErrorCode ierr;
2297   int         size;
2298   Mat_SeqBAIJ *b;
2299 
2300   PetscFunctionBegin;
2301   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2302   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2303 
2304   B->m = B->M = PetscMax(B->m,B->M);
2305   B->n = B->N = PetscMax(B->n,B->N);
2306   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2307   B->data = (void*)b;
2308   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2309   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2310   B->factor           = 0;
2311   B->lupivotthreshold = 1.0;
2312   B->mapping          = 0;
2313   b->row              = 0;
2314   b->col              = 0;
2315   b->icol             = 0;
2316   b->reallocs         = 0;
2317   b->saved_values     = 0;
2318 #if defined(PETSC_USE_MAT_SINGLE)
2319   b->setvalueslen     = 0;
2320   b->setvaluescopy    = PETSC_NULL;
2321 #endif
2322 
2323   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2324   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2325 
2326   b->sorted           = PETSC_FALSE;
2327   b->roworiented      = PETSC_TRUE;
2328   b->nonew            = 0;
2329   b->diag             = 0;
2330   b->solve_work       = 0;
2331   b->mult_work        = 0;
2332   B->spptr            = 0;
2333   B->info.nz_unneeded = (PetscReal)b->maxnz;
2334   b->keepzeroedrows   = PETSC_FALSE;
2335   b->xtoy              = 0;
2336   b->XtoY              = 0;
2337 
2338   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2339                                      "MatStoreValues_SeqBAIJ",
2340                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2341   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2342                                      "MatRetrieveValues_SeqBAIJ",
2343                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2344   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2345                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2346                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2347   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2348                                      "MatConvert_SeqBAIJ_SeqAIJ",
2349                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2350 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2351                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2352                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2353   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2354                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2355                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2356   PetscFunctionReturn(0);
2357 }
2358 EXTERN_C_END
2359 
2360 #undef __FUNCT__
2361 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2362 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2363 {
2364   Mat         C;
2365   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
2366   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
2367 
2368   PetscFunctionBegin;
2369   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2370 
2371   *B = 0;
2372   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2373   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2374   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2375   c    = (Mat_SeqBAIJ*)C->data;
2376 
2377   C->M   = A->M;
2378   C->N   = A->N;
2379   c->bs  = a->bs;
2380   c->bs2 = a->bs2;
2381   c->mbs = a->mbs;
2382   c->nbs = a->nbs;
2383 
2384   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
2385   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
2386   for (i=0; i<mbs; i++) {
2387     c->imax[i] = a->imax[i];
2388     c->ilen[i] = a->ilen[i];
2389   }
2390 
2391   /* allocate the matrix space */
2392   c->singlemalloc = PETSC_TRUE;
2393   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
2394   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2395   c->j = (int*)(c->a + nz*bs2);
2396   c->i = c->j + nz;
2397   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
2398   if (mbs > 0) {
2399     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
2400     if (cpvalues == MAT_COPY_VALUES) {
2401       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2402     } else {
2403       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2404     }
2405   }
2406 
2407   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2408   c->sorted      = a->sorted;
2409   c->roworiented = a->roworiented;
2410   c->nonew       = a->nonew;
2411 
2412   if (a->diag) {
2413     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
2414     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
2415     for (i=0; i<mbs; i++) {
2416       c->diag[i] = a->diag[i];
2417     }
2418   } else c->diag        = 0;
2419   c->nz                 = a->nz;
2420   c->maxnz              = a->maxnz;
2421   c->solve_work         = 0;
2422   c->mult_work          = 0;
2423   C->preallocated       = PETSC_TRUE;
2424   C->assembled          = PETSC_TRUE;
2425   *B = C;
2426   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "MatLoad_SeqBAIJ"
2432 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2433 {
2434   Mat_SeqBAIJ  *a;
2435   Mat          B;
2436   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
2437   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2438   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2439   int          *masked,nmask,tmp,bs2,ishift;
2440   PetscScalar  *aa;
2441   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2442 
2443   PetscFunctionBegin;
2444   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2445   bs2  = bs*bs;
2446 
2447   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2448   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2449   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2450   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2451   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2452   M = header[1]; N = header[2]; nz = header[3];
2453 
2454   if (header[3] < 0) {
2455     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2456   }
2457 
2458   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2459 
2460   /*
2461      This code adds extra rows to make sure the number of rows is
2462     divisible by the blocksize
2463   */
2464   mbs        = M/bs;
2465   extra_rows = bs - M + bs*(mbs);
2466   if (extra_rows == bs) extra_rows = 0;
2467   else                  mbs++;
2468   if (extra_rows) {
2469     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2470   }
2471 
2472   /* read in row lengths */
2473   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2474   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2475   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2476 
2477   /* read in column indices */
2478   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
2479   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2480   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2481 
2482   /* loop over row lengths determining block row lengths */
2483   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
2484   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
2485   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
2486   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
2487   masked   = mask + mbs;
2488   rowcount = 0; nzcount = 0;
2489   for (i=0; i<mbs; i++) {
2490     nmask = 0;
2491     for (j=0; j<bs; j++) {
2492       kmax = rowlengths[rowcount];
2493       for (k=0; k<kmax; k++) {
2494         tmp = jj[nzcount++]/bs;
2495         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2496       }
2497       rowcount++;
2498     }
2499     browlengths[i] += nmask;
2500     /* zero out the mask elements we set */
2501     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2502   }
2503 
2504   /* create our matrix */
2505   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2506   ierr = MatSetType(B,type);CHKERRQ(ierr);
2507   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2508   a = (Mat_SeqBAIJ*)B->data;
2509 
2510   /* set matrix "i" values */
2511   a->i[0] = 0;
2512   for (i=1; i<= mbs; i++) {
2513     a->i[i]      = a->i[i-1] + browlengths[i-1];
2514     a->ilen[i-1] = browlengths[i-1];
2515   }
2516   a->nz         = 0;
2517   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2518 
2519   /* read in nonzero values */
2520   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2521   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2522   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2523 
2524   /* set "a" and "j" values into matrix */
2525   nzcount = 0; jcount = 0;
2526   for (i=0; i<mbs; i++) {
2527     nzcountb = nzcount;
2528     nmask    = 0;
2529     for (j=0; j<bs; j++) {
2530       kmax = rowlengths[i*bs+j];
2531       for (k=0; k<kmax; k++) {
2532         tmp = jj[nzcount++]/bs;
2533 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2534       }
2535     }
2536     /* sort the masked values */
2537     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2538 
2539     /* set "j" values into matrix */
2540     maskcount = 1;
2541     for (j=0; j<nmask; j++) {
2542       a->j[jcount++]  = masked[j];
2543       mask[masked[j]] = maskcount++;
2544     }
2545     /* set "a" values into matrix */
2546     ishift = bs2*a->i[i];
2547     for (j=0; j<bs; j++) {
2548       kmax = rowlengths[i*bs+j];
2549       for (k=0; k<kmax; k++) {
2550         tmp       = jj[nzcountb]/bs ;
2551         block     = mask[tmp] - 1;
2552         point     = jj[nzcountb] - bs*tmp;
2553         idx       = ishift + bs2*block + j + bs*point;
2554         a->a[idx] = (MatScalar)aa[nzcountb++];
2555       }
2556     }
2557     /* zero out the mask elements we set */
2558     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2559   }
2560   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2561 
2562   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2563   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2564   ierr = PetscFree(aa);CHKERRQ(ierr);
2565   ierr = PetscFree(jj);CHKERRQ(ierr);
2566   ierr = PetscFree(mask);CHKERRQ(ierr);
2567 
2568   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2569   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2570   ierr = MatView_Private(B);CHKERRQ(ierr);
2571 
2572   *A = B;
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 #undef __FUNCT__
2577 #define __FUNCT__ "MatCreateSeqBAIJ"
2578 /*@C
2579    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2580    compressed row) format.  For good matrix assembly performance the
2581    user should preallocate the matrix storage by setting the parameter nz
2582    (or the array nnz).  By setting these parameters accurately, performance
2583    during matrix assembly can be increased by more than a factor of 50.
2584 
2585    Collective on MPI_Comm
2586 
2587    Input Parameters:
2588 +  comm - MPI communicator, set to PETSC_COMM_SELF
2589 .  bs - size of block
2590 .  m - number of rows
2591 .  n - number of columns
2592 .  nz - number of nonzero blocks  per block row (same for all rows)
2593 -  nnz - array containing the number of nonzero blocks in the various block rows
2594          (possibly different for each block row) or PETSC_NULL
2595 
2596    Output Parameter:
2597 .  A - the matrix
2598 
2599    Options Database Keys:
2600 .   -mat_no_unroll - uses code that does not unroll the loops in the
2601                      block calculations (much slower)
2602 .    -mat_block_size - size of the blocks to use
2603 
2604    Level: intermediate
2605 
2606    Notes:
2607    A nonzero block is any block that as 1 or more nonzeros in it
2608 
2609    The block AIJ format is fully compatible with standard Fortran 77
2610    storage.  That is, the stored row and column indices can begin at
2611    either one (as in Fortran) or zero.  See the users' manual for details.
2612 
2613    Specify the preallocated storage with either nz or nnz (not both).
2614    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2615    allocation.  For additional details, see the users manual chapter on
2616    matrices.
2617 
2618 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2619 @*/
2620 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2621 {
2622   PetscErrorCode ierr;
2623 
2624   PetscFunctionBegin;
2625   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2626   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2627   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2628   PetscFunctionReturn(0);
2629 }
2630 
2631 #undef __FUNCT__
2632 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2633 /*@C
2634    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2635    per row in the matrix. For good matrix assembly performance the
2636    user should preallocate the matrix storage by setting the parameter nz
2637    (or the array nnz).  By setting these parameters accurately, performance
2638    during matrix assembly can be increased by more than a factor of 50.
2639 
2640    Collective on MPI_Comm
2641 
2642    Input Parameters:
2643 +  A - the matrix
2644 .  bs - size of block
2645 .  nz - number of block nonzeros per block row (same for all rows)
2646 -  nnz - array containing the number of block nonzeros in the various block rows
2647          (possibly different for each block row) or PETSC_NULL
2648 
2649    Options Database Keys:
2650 .   -mat_no_unroll - uses code that does not unroll the loops in the
2651                      block calculations (much slower)
2652 .    -mat_block_size - size of the blocks to use
2653 
2654    Level: intermediate
2655 
2656    Notes:
2657    The block AIJ format is fully compatible with standard Fortran 77
2658    storage.  That is, the stored row and column indices can begin at
2659    either one (as in Fortran) or zero.  See the users' manual for details.
2660 
2661    Specify the preallocated storage with either nz or nnz (not both).
2662    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2663    allocation.  For additional details, see the users manual chapter on
2664    matrices.
2665 
2666 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2667 @*/
2668 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2669 {
2670   PetscErrorCode ierr,(*f)(Mat,int,int,const int[]);
2671 
2672   PetscFunctionBegin;
2673   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2674   if (f) {
2675     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2676   }
2677   PetscFunctionReturn(0);
2678 }
2679 
2680