xref: /petsc/src/mat/impls/baij/seq/baij.c (revision b4ea070f8aad0bed66186c809ab8a23e930ef111)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
15 int MatInvertBlockDiagonal_SeqBAIJ(Mat A)
16 {
17   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
18   int         *diag_offset,i,bs = a->bs,mbs = a->mbs,ierr;
19   PetscScalar *v = a->a,*odiag,*diag,*mdiag;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) PetscFunctionReturn(0);
23   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
24   diag_offset = a->diag;
25   if (!a->idiag) {
26     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
27   }
28   diag  = a->idiag;
29   mdiag = a->idiag+bs*bs*mbs;
30   /* factor and invert each block */
31   switch (a->bs){
32     case 2:
33       for (i=0; i<mbs; i++) {
34         odiag   = v + 4*diag_offset[i];
35         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
36 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
37 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
38 	diag    += 4;
39 	mdiag   += 4;
40       }
41       break;
42     case 3:
43       for (i=0; i<mbs; i++) {
44         odiag    = v + 9*diag_offset[i];
45         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
46         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
47         diag[8]  = odiag[8];
48         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
49         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
50         mdiag[8] = odiag[8];
51 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
52 	diag    += 9;
53 	mdiag   += 9;
54       }
55       break;
56     case 4:
57       for (i=0; i<mbs; i++) {
58         odiag  = v + 16*diag_offset[i];
59         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
60         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
61 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
62 	diag  += 16;
63 	mdiag += 16;
64       }
65       break;
66     case 5:
67       for (i=0; i<mbs; i++) {
68         odiag = v + 25*diag_offset[i];
69         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
70         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
71 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
72 	diag  += 25;
73 	mdiag += 25;
74       }
75       break;
76     default:
77       SETERRQ1(1,"not supported for block size %d",a->bs);
78   }
79   a->idiagvalid = PETSC_TRUE;
80   PetscFunctionReturn(0);
81 }
82 
83 #undef __FUNCT__
84 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
85 int MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
86 {
87   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
88   PetscScalar        *x,x1,x2,s1,s2;
89   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
90   int                ierr,m = a->mbs,i,i2,nz,idx;
91   const int          *diag,*ai = a->i,*aj = a->j,*vi;
92 
93   PetscFunctionBegin;
94   its = its*lits;
95   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
96   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
97   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
98   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
99   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
100 
101   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
102 
103   diag  = a->diag;
104   idiag = a->idiag;
105   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
106   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
107 
108   if (flag & SOR_ZERO_INITIAL_GUESS) {
109     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
110       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
111       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
112       i2     = 2;
113       idiag += 4;
114       for (i=1; i<m; i++) {
115 	v     = aa + 4*ai[i];
116 	vi    = aj + ai[i];
117 	nz    = diag[i] - ai[i];
118 	s1    = b[i2]; s2 = b[i2+1];
119 	while (nz--) {
120 	  idx  = 2*(*vi++);
121 	  x1   = x[idx]; x2 = x[1+idx];
122 	  s1  -= v[0]*x1 + v[2]*x2;
123 	  s2  -= v[1]*x1 + v[3]*x2;
124 	  v   += 4;
125 	}
126 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
127 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
128         idiag   += 4;
129         i2      += 2;
130       }
131       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
132       PetscLogFlops(4*(a->nz));
133     }
134     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
135         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
136       i2    = 0;
137       mdiag = a->idiag+4*a->mbs;
138       for (i=0; i<m; i++) {
139         x1      = x[i2]; x2 = x[i2+1];
140         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
141         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
142         mdiag  += 4;
143         i2     += 2;
144       }
145       PetscLogFlops(6*m);
146     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
147       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
148     }
149     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
150       idiag   = a->idiag+4*a->mbs - 4;
151       i2      = 2*m - 2;
152       x1      = x[i2]; x2 = x[i2+1];
153       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
154       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
155       idiag -= 4;
156       i2    -= 2;
157       for (i=m-2; i>=0; i--) {
158 	v     = aa + 4*(diag[i]+1);
159 	vi    = aj + diag[i] + 1;
160 	nz    = ai[i+1] - diag[i] - 1;
161 	s1    = x[i2]; s2 = x[i2+1];
162 	while (nz--) {
163 	  idx  = 2*(*vi++);
164 	  x1   = x[idx]; x2 = x[1+idx];
165 	  s1  -= v[0]*x1 + v[2]*x2;
166 	  s2  -= v[1]*x1 + v[3]*x2;
167 	  v   += 4;
168 	}
169 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
170 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
171         idiag   -= 4;
172         i2      -= 2;
173       }
174       PetscLogFlops(4*(a->nz));
175     }
176   } else {
177     SETERRQ(1,"Only supports point block SOR with zero initial guess");
178   }
179   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
180   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
181   PetscFunctionReturn(0);
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
186 int MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
187 {
188   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
189   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
190   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
191   int                ierr,m = a->mbs,i,i2,nz,idx;
192   const int          *diag,*ai = a->i,*aj = a->j,*vi;
193 
194   PetscFunctionBegin;
195   its = its*lits;
196   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
197   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
198   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
199   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
200   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
201 
202   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
203 
204   diag  = a->diag;
205   idiag = a->idiag;
206   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
207   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
208 
209   if (flag & SOR_ZERO_INITIAL_GUESS) {
210     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
211       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
212       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
213       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
214       i2     = 3;
215       idiag += 9;
216       for (i=1; i<m; i++) {
217 	v     = aa + 9*ai[i];
218 	vi    = aj + ai[i];
219 	nz    = diag[i] - ai[i];
220 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
221 	while (nz--) {
222 	  idx  = 3*(*vi++);
223 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
224 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
225 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
226 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
227 	  v   += 9;
228 	}
229 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
230 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
231 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
232         idiag   += 9;
233         i2      += 3;
234       }
235       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
236       PetscLogFlops(9*(a->nz));
237     }
238     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
239         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
240       i2    = 0;
241       mdiag = a->idiag+9*a->mbs;
242       for (i=0; i<m; i++) {
243         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
244         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
245         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
246         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
247         mdiag  += 9;
248         i2     += 3;
249       }
250       PetscLogFlops(15*m);
251     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
252       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
253     }
254     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
255       idiag   = a->idiag+9*a->mbs - 9;
256       i2      = 3*m - 3;
257       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
258       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
259       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
260       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
261       idiag -= 9;
262       i2    -= 3;
263       for (i=m-2; i>=0; i--) {
264 	v     = aa + 9*(diag[i]+1);
265 	vi    = aj + diag[i] + 1;
266 	nz    = ai[i+1] - diag[i] - 1;
267 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
268 	while (nz--) {
269 	  idx  = 3*(*vi++);
270 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
271 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
272 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
273 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
274 	  v   += 9;
275 	}
276 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
277 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
278 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
279         idiag   -= 9;
280         i2      -= 3;
281       }
282       PetscLogFlops(9*(a->nz));
283     }
284   } else {
285     SETERRQ(1,"Only supports point block SOR with zero initial guess");
286   }
287   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
288   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
294 int MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
295 {
296   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
297   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
298   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
299   int                ierr,m = a->mbs,i,i2,nz,idx;
300   const int          *diag,*ai = a->i,*aj = a->j,*vi;
301 
302   PetscFunctionBegin;
303   its = its*lits;
304   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
305   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
306   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
307   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
308   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
309 
310   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
311 
312   diag  = a->diag;
313   idiag = a->idiag;
314   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
315   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
316 
317   if (flag & SOR_ZERO_INITIAL_GUESS) {
318     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
319       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
320       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
321       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
322       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
323       i2     = 4;
324       idiag += 16;
325       for (i=1; i<m; i++) {
326 	v     = aa + 16*ai[i];
327 	vi    = aj + ai[i];
328 	nz    = diag[i] - ai[i];
329 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
330 	while (nz--) {
331 	  idx  = 4*(*vi++);
332 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
333 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
334 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
335 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
336 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
337 	  v   += 16;
338 	}
339 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
340 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
341 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
342 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
343         idiag   += 16;
344         i2      += 4;
345       }
346       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
347       PetscLogFlops(16*(a->nz));
348     }
349     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
350         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
351       i2    = 0;
352       mdiag = a->idiag+16*a->mbs;
353       for (i=0; i<m; i++) {
354         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
355         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
356         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
357         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
358         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
359         mdiag  += 16;
360         i2     += 4;
361       }
362       PetscLogFlops(28*m);
363     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
364       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
365     }
366     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
367       idiag   = a->idiag+16*a->mbs - 16;
368       i2      = 4*m - 4;
369       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
370       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
371       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
372       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
373       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
374       idiag -= 16;
375       i2    -= 4;
376       for (i=m-2; i>=0; i--) {
377 	v     = aa + 16*(diag[i]+1);
378 	vi    = aj + diag[i] + 1;
379 	nz    = ai[i+1] - diag[i] - 1;
380 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
381 	while (nz--) {
382 	  idx  = 4*(*vi++);
383 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
384 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
385 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
386 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
387 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
388 	  v   += 16;
389 	}
390 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
391 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
392 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
393 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
394         idiag   -= 16;
395         i2      -= 4;
396       }
397       PetscLogFlops(16*(a->nz));
398     }
399   } else {
400     SETERRQ(1,"Only supports point block SOR with zero initial guess");
401   }
402   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
403   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
404   PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
409 int MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
410 {
411   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
412   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
413   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
414   int                ierr,m = a->mbs,i,i2,nz,idx;
415   const int          *diag,*ai = a->i,*aj = a->j,*vi;
416 
417   PetscFunctionBegin;
418   its = its*lits;
419   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
420   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
421   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
422   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
423   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
424 
425   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
426 
427   diag  = a->diag;
428   idiag = a->idiag;
429   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
430   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
431 
432   if (flag & SOR_ZERO_INITIAL_GUESS) {
433     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
434       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
435       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
436       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
437       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
438       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
439       i2     = 5;
440       idiag += 25;
441       for (i=1; i<m; i++) {
442 	v     = aa + 25*ai[i];
443 	vi    = aj + ai[i];
444 	nz    = diag[i] - ai[i];
445 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
446 	while (nz--) {
447 	  idx  = 5*(*vi++);
448 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
449 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
450 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
451 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
452 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
453 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
454 	  v   += 25;
455 	}
456 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
457 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
458 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
459 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
460 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
461         idiag   += 25;
462         i2      += 5;
463       }
464       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
465       PetscLogFlops(25*(a->nz));
466     }
467     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
468         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
469       i2    = 0;
470       mdiag = a->idiag+25*a->mbs;
471       for (i=0; i<m; i++) {
472         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
473         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
474         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
475         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
476         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
477         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
478         mdiag  += 25;
479         i2     += 5;
480       }
481       PetscLogFlops(45*m);
482     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
483       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
484     }
485     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
486       idiag   = a->idiag+25*a->mbs - 25;
487       i2      = 5*m - 5;
488       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
489       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
490       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
491       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
492       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
493       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
494       idiag -= 25;
495       i2    -= 5;
496       for (i=m-2; i>=0; i--) {
497 	v     = aa + 25*(diag[i]+1);
498 	vi    = aj + diag[i] + 1;
499 	nz    = ai[i+1] - diag[i] - 1;
500 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
501 	while (nz--) {
502 	  idx  = 5*(*vi++);
503 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
504 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
505 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
506 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
507 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
508 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
509 	  v   += 25;
510 	}
511 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
512 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
513 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
514 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
515 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
516         idiag   -= 25;
517         i2      -= 5;
518       }
519       PetscLogFlops(25*(a->nz));
520     }
521   } else {
522     SETERRQ(1,"Only supports point block SOR with zero initial guess");
523   }
524   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
525   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 /*
530     Special version for Fun3d sequential benchmark
531 */
532 #if defined(PETSC_HAVE_FORTRAN_CAPS)
533 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
534 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
535 #define matsetvaluesblocked4_ matsetvaluesblocked4
536 #endif
537 
538 EXTERN_C_BEGIN
539 #undef __FUNCT__
540 #define __FUNCT__ "matsetvaluesblocked4_"
541 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
542 {
543   Mat               A = *AA;
544   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
545   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
546   int               *ai=a->i,*ailen=a->ilen;
547   int               *aj=a->j,stepval;
548   const PetscScalar *value = v;
549   MatScalar         *ap,*aa = a->a,*bap;
550 
551   PetscFunctionBegin;
552   stepval = (n-1)*4;
553   for (k=0; k<m; k++) { /* loop over added rows */
554     row  = im[k];
555     rp   = aj + ai[row];
556     ap   = aa + 16*ai[row];
557     nrow = ailen[row];
558     low  = 0;
559     for (l=0; l<n; l++) { /* loop over added columns */
560       col = in[l];
561       value = v + k*(stepval+4)*4 + l*4;
562       low = 0; high = nrow;
563       while (high-low > 7) {
564         t = (low+high)/2;
565         if (rp[t] > col) high = t;
566         else             low  = t;
567       }
568       for (i=low; i<high; i++) {
569         if (rp[i] > col) break;
570         if (rp[i] == col) {
571           bap  = ap +  16*i;
572           for (ii=0; ii<4; ii++,value+=stepval) {
573             for (jj=ii; jj<16; jj+=4) {
574               bap[jj] += *value++;
575             }
576           }
577           goto noinsert2;
578         }
579       }
580       N = nrow++ - 1;
581       /* shift up all the later entries in this row */
582       for (ii=N; ii>=i; ii--) {
583         rp[ii+1] = rp[ii];
584         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
585       }
586       if (N >= i) {
587         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
588       }
589       rp[i] = col;
590       bap   = ap +  16*i;
591       for (ii=0; ii<4; ii++,value+=stepval) {
592         for (jj=ii; jj<16; jj+=4) {
593           bap[jj] = *value++;
594         }
595       }
596       noinsert2:;
597       low = i;
598     }
599     ailen[row] = nrow;
600   }
601 }
602 EXTERN_C_END
603 
604 #if defined(PETSC_HAVE_FORTRAN_CAPS)
605 #define matsetvalues4_ MATSETVALUES4
606 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
607 #define matsetvalues4_ matsetvalues4
608 #endif
609 
610 EXTERN_C_BEGIN
611 #undef __FUNCT__
612 #define __FUNCT__ "MatSetValues4_"
613 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
614 {
615   Mat         A = *AA;
616   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
617   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
618   int         *ai=a->i,*ailen=a->ilen;
619   int         *aj=a->j,brow,bcol;
620   int         ridx,cidx;
621   MatScalar   *ap,value,*aa=a->a,*bap;
622 
623   PetscFunctionBegin;
624   for (k=0; k<m; k++) { /* loop over added rows */
625     row  = im[k]; brow = row/4;
626     rp   = aj + ai[brow];
627     ap   = aa + 16*ai[brow];
628     nrow = ailen[brow];
629     low  = 0;
630     for (l=0; l<n; l++) { /* loop over added columns */
631       col = in[l]; bcol = col/4;
632       ridx = row % 4; cidx = col % 4;
633       value = v[l + k*n];
634       low = 0; high = nrow;
635       while (high-low > 7) {
636         t = (low+high)/2;
637         if (rp[t] > bcol) high = t;
638         else              low  = t;
639       }
640       for (i=low; i<high; i++) {
641         if (rp[i] > bcol) break;
642         if (rp[i] == bcol) {
643           bap  = ap +  16*i + 4*cidx + ridx;
644           *bap += value;
645           goto noinsert1;
646         }
647       }
648       N = nrow++ - 1;
649       /* shift up all the later entries in this row */
650       for (ii=N; ii>=i; ii--) {
651         rp[ii+1] = rp[ii];
652         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
653       }
654       if (N>=i) {
655         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
656       }
657       rp[i]                    = bcol;
658       ap[16*i + 4*cidx + ridx] = value;
659       noinsert1:;
660       low = i;
661     }
662     ailen[brow] = nrow;
663   }
664 }
665 EXTERN_C_END
666 
667 /*  UGLY, ugly, ugly
668    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
669    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
670    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
671    converts the entries into single precision and then calls ..._MatScalar() to put them
672    into the single precision data structures.
673 */
674 #if defined(PETSC_USE_MAT_SINGLE)
675 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
676 #else
677 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
678 #endif
679 
680 #define CHUNKSIZE  10
681 
682 /*
683      Checks for missing diagonals
684 */
685 #undef __FUNCT__
686 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
687 int MatMissingDiagonal_SeqBAIJ(Mat A)
688 {
689   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
690   int         *diag,*jj = a->j,i,ierr;
691 
692   PetscFunctionBegin;
693   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
694   diag = a->diag;
695   for (i=0; i<a->mbs; i++) {
696     if (jj[diag[i]] != i) {
697       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
698     }
699   }
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
705 int MatMarkDiagonal_SeqBAIJ(Mat A)
706 {
707   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
708   int         i,j,*diag,m = a->mbs,ierr;
709 
710   PetscFunctionBegin;
711   if (a->diag) PetscFunctionReturn(0);
712 
713   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
714   PetscLogObjectMemory(A,(m+1)*sizeof(int));
715   for (i=0; i<m; i++) {
716     diag[i] = a->i[i+1];
717     for (j=a->i[i]; j<a->i[i+1]; j++) {
718       if (a->j[j] == i) {
719         diag[i] = j;
720         break;
721       }
722     }
723   }
724   a->diag = diag;
725   PetscFunctionReturn(0);
726 }
727 
728 
729 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
733 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
734 {
735   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
736   int         ierr,n = a->mbs,i;
737 
738   PetscFunctionBegin;
739   *nn = n;
740   if (!ia) PetscFunctionReturn(0);
741   if (symmetric) {
742     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
743   } else if (oshift == 1) {
744     /* temporarily add 1 to i and j indices */
745     int nz = a->i[n];
746     for (i=0; i<nz; i++) a->j[i]++;
747     for (i=0; i<n+1; i++) a->i[i]++;
748     *ia = a->i; *ja = a->j;
749   } else {
750     *ia = a->i; *ja = a->j;
751   }
752 
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNCT__
757 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
758 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
759 {
760   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
761   int         i,n = a->mbs,ierr;
762 
763   PetscFunctionBegin;
764   if (!ia) PetscFunctionReturn(0);
765   if (symmetric) {
766     ierr = PetscFree(*ia);CHKERRQ(ierr);
767     ierr = PetscFree(*ja);CHKERRQ(ierr);
768   } else if (oshift == 1) {
769     int nz = a->i[n]-1;
770     for (i=0; i<nz; i++) a->j[i]--;
771     for (i=0; i<n+1; i++) a->i[i]--;
772   }
773   PetscFunctionReturn(0);
774 }
775 
776 #undef __FUNCT__
777 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
778 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
779 {
780   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
781 
782   PetscFunctionBegin;
783   *bs = baij->bs;
784   PetscFunctionReturn(0);
785 }
786 
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatDestroy_SeqBAIJ"
790 int MatDestroy_SeqBAIJ(Mat A)
791 {
792   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
793   int         ierr;
794 
795   PetscFunctionBegin;
796 #if defined(PETSC_USE_LOG)
797   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
798 #endif
799   ierr = PetscFree(a->a);CHKERRQ(ierr);
800   if (!a->singlemalloc) {
801     ierr = PetscFree(a->i);CHKERRQ(ierr);
802     ierr = PetscFree(a->j);CHKERRQ(ierr);
803   }
804   if (a->row) {
805     ierr = ISDestroy(a->row);CHKERRQ(ierr);
806   }
807   if (a->col) {
808     ierr = ISDestroy(a->col);CHKERRQ(ierr);
809   }
810   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
811   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
812   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
813   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
814   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
815   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
816   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
817   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
818 #if defined(PETSC_USE_MAT_SINGLE)
819   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
820 #endif
821   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
822 
823   ierr = PetscFree(a);CHKERRQ(ierr);
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNCT__
828 #define __FUNCT__ "MatSetOption_SeqBAIJ"
829 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
830 {
831   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
832 
833   PetscFunctionBegin;
834   switch (op) {
835   case MAT_ROW_ORIENTED:
836     a->roworiented    = PETSC_TRUE;
837     break;
838   case MAT_COLUMN_ORIENTED:
839     a->roworiented    = PETSC_FALSE;
840     break;
841   case MAT_COLUMNS_SORTED:
842     a->sorted         = PETSC_TRUE;
843     break;
844   case MAT_COLUMNS_UNSORTED:
845     a->sorted         = PETSC_FALSE;
846     break;
847   case MAT_KEEP_ZEROED_ROWS:
848     a->keepzeroedrows = PETSC_TRUE;
849     break;
850   case MAT_NO_NEW_NONZERO_LOCATIONS:
851     a->nonew          = 1;
852     break;
853   case MAT_NEW_NONZERO_LOCATION_ERR:
854     a->nonew          = -1;
855     break;
856   case MAT_NEW_NONZERO_ALLOCATION_ERR:
857     a->nonew          = -2;
858     break;
859   case MAT_YES_NEW_NONZERO_LOCATIONS:
860     a->nonew          = 0;
861     break;
862   case MAT_ROWS_SORTED:
863   case MAT_ROWS_UNSORTED:
864   case MAT_YES_NEW_DIAGONALS:
865   case MAT_IGNORE_OFF_PROC_ENTRIES:
866   case MAT_USE_HASH_TABLE:
867     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
868     break;
869   case MAT_NO_NEW_DIAGONALS:
870     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
871   case MAT_SYMMETRIC:
872   case MAT_STRUCTURALLY_SYMMETRIC:
873   case MAT_NOT_SYMMETRIC:
874   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
875   case MAT_HERMITIAN:
876   case MAT_NOT_HERMITIAN:
877   case MAT_SYMMETRY_ETERNAL:
878   case MAT_NOT_SYMMETRY_ETERNAL:
879     break;
880   default:
881     SETERRQ(PETSC_ERR_SUP,"unknown option");
882   }
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatGetRow_SeqBAIJ"
888 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
889 {
890   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
891   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
892   MatScalar    *aa,*aa_i;
893   PetscScalar  *v_i;
894 
895   PetscFunctionBegin;
896   bs  = a->bs;
897   ai  = a->i;
898   aj  = a->j;
899   aa  = a->a;
900   bs2 = a->bs2;
901 
902   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
903 
904   bn  = row/bs;   /* Block number */
905   bp  = row % bs; /* Block Position */
906   M   = ai[bn+1] - ai[bn];
907   *nz = bs*M;
908 
909   if (v) {
910     *v = 0;
911     if (*nz) {
912       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
913       for (i=0; i<M; i++) { /* for each block in the block row */
914         v_i  = *v + i*bs;
915         aa_i = aa + bs2*(ai[bn] + i);
916         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
917       }
918     }
919   }
920 
921   if (idx) {
922     *idx = 0;
923     if (*nz) {
924       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
925       for (i=0; i<M; i++) { /* for each block in the block row */
926         idx_i = *idx + i*bs;
927         itmp  = bs*aj[ai[bn] + i];
928         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
929       }
930     }
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 #undef __FUNCT__
936 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
937 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
938 {
939   int ierr;
940 
941   PetscFunctionBegin;
942   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
943   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatTranspose_SeqBAIJ"
949 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
950 {
951   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
952   Mat         C;
953   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
954   int         *rows,*cols,bs2=a->bs2;
955   PetscScalar *array;
956 
957   PetscFunctionBegin;
958   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
959   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
960   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
961 
962 #if defined(PETSC_USE_MAT_SINGLE)
963   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
964   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
965 #else
966   array = a->a;
967 #endif
968 
969   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
970   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
971   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
972   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
973   ierr = PetscFree(col);CHKERRQ(ierr);
974   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
975   cols = rows + bs;
976   for (i=0; i<mbs; i++) {
977     cols[0] = i*bs;
978     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
979     len = ai[i+1] - ai[i];
980     for (j=0; j<len; j++) {
981       rows[0] = (*aj++)*bs;
982       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
983       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
984       array += bs2;
985     }
986   }
987   ierr = PetscFree(rows);CHKERRQ(ierr);
988 #if defined(PETSC_USE_MAT_SINGLE)
989   ierr = PetscFree(array);
990 #endif
991 
992   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
993   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
994 
995   if (B) {
996     *B = C;
997   } else {
998     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
999   }
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1005 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1006 {
1007   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1008   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
1009   PetscScalar *aa;
1010   FILE        *file;
1011 
1012   PetscFunctionBegin;
1013   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1014   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
1015   col_lens[0] = MAT_FILE_COOKIE;
1016 
1017   col_lens[1] = A->m;
1018   col_lens[2] = A->n;
1019   col_lens[3] = a->nz*bs2;
1020 
1021   /* store lengths of each row and write (including header) to file */
1022   count = 0;
1023   for (i=0; i<a->mbs; i++) {
1024     for (j=0; j<bs; j++) {
1025       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1026     }
1027   }
1028   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
1029   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1030 
1031   /* store column indices (zero start index) */
1032   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
1033   count = 0;
1034   for (i=0; i<a->mbs; i++) {
1035     for (j=0; j<bs; j++) {
1036       for (k=a->i[i]; k<a->i[i+1]; k++) {
1037         for (l=0; l<bs; l++) {
1038           jj[count++] = bs*a->j[k] + l;
1039         }
1040       }
1041     }
1042   }
1043   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
1044   ierr = PetscFree(jj);CHKERRQ(ierr);
1045 
1046   /* store nonzero values */
1047   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1048   count = 0;
1049   for (i=0; i<a->mbs; i++) {
1050     for (j=0; j<bs; j++) {
1051       for (k=a->i[i]; k<a->i[i+1]; k++) {
1052         for (l=0; l<bs; l++) {
1053           aa[count++] = a->a[bs2*k + l*bs + j];
1054         }
1055       }
1056     }
1057   }
1058   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
1059   ierr = PetscFree(aa);CHKERRQ(ierr);
1060 
1061   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1062   if (file) {
1063     fprintf(file,"-matload_block_size %d\n",a->bs);
1064   }
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1070 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1071 {
1072   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1073   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
1074   PetscViewerFormat format;
1075 
1076   PetscFunctionBegin;
1077   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1078   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1079     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1080   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1081     Mat aij;
1082     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
1083     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1084     ierr = MatDestroy(aij);CHKERRQ(ierr);
1085   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086      PetscFunctionReturn(0);
1087   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1088     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1089     for (i=0; i<a->mbs; i++) {
1090       for (j=0; j<bs; j++) {
1091         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
1092         for (k=a->i[i]; k<a->i[i+1]; k++) {
1093           for (l=0; l<bs; l++) {
1094 #if defined(PETSC_USE_COMPLEX)
1095             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1096               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
1097                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1098             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1099               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
1100                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1101             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1102               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1103             }
1104 #else
1105             if (a->a[bs2*k + l*bs + j] != 0.0) {
1106               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1107             }
1108 #endif
1109           }
1110         }
1111         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1112       }
1113     }
1114     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1115   } else {
1116     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1117     for (i=0; i<a->mbs; i++) {
1118       for (j=0; j<bs; j++) {
1119         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
1120         for (k=a->i[i]; k<a->i[i+1]; k++) {
1121           for (l=0; l<bs; l++) {
1122 #if defined(PETSC_USE_COMPLEX)
1123             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1124               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
1125                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1126             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1127               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
1128                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1129             } else {
1130               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1131             }
1132 #else
1133             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1134 #endif
1135           }
1136         }
1137         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1138       }
1139     }
1140     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1141   }
1142   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1148 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1149 {
1150   Mat          A = (Mat) Aa;
1151   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
1152   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
1153   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1154   MatScalar    *aa;
1155   PetscViewer  viewer;
1156 
1157   PetscFunctionBegin;
1158 
1159   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1160   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1161 
1162   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1163 
1164   /* loop over matrix elements drawing boxes */
1165   color = PETSC_DRAW_BLUE;
1166   for (i=0,row=0; i<mbs; i++,row+=bs) {
1167     for (j=a->i[i]; j<a->i[i+1]; j++) {
1168       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1169       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1170       aa = a->a + j*bs2;
1171       for (k=0; k<bs; k++) {
1172         for (l=0; l<bs; l++) {
1173           if (PetscRealPart(*aa++) >=  0.) continue;
1174           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1175         }
1176       }
1177     }
1178   }
1179   color = PETSC_DRAW_CYAN;
1180   for (i=0,row=0; i<mbs; i++,row+=bs) {
1181     for (j=a->i[i]; j<a->i[i+1]; j++) {
1182       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1183       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1184       aa = a->a + j*bs2;
1185       for (k=0; k<bs; k++) {
1186         for (l=0; l<bs; l++) {
1187           if (PetscRealPart(*aa++) != 0.) continue;
1188           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1189         }
1190       }
1191     }
1192   }
1193 
1194   color = PETSC_DRAW_RED;
1195   for (i=0,row=0; i<mbs; i++,row+=bs) {
1196     for (j=a->i[i]; j<a->i[i+1]; j++) {
1197       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1198       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1199       aa = a->a + j*bs2;
1200       for (k=0; k<bs; k++) {
1201         for (l=0; l<bs; l++) {
1202           if (PetscRealPart(*aa++) <= 0.) continue;
1203           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1204         }
1205       }
1206     }
1207   }
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1213 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1214 {
1215   int          ierr;
1216   PetscReal    xl,yl,xr,yr,w,h;
1217   PetscDraw    draw;
1218   PetscTruth   isnull;
1219 
1220   PetscFunctionBegin;
1221 
1222   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1223   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1224 
1225   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1226   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1227   xr += w;    yr += h;  xl = -w;     yl = -h;
1228   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1229   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1230   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatView_SeqBAIJ"
1236 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1237 {
1238   int        ierr;
1239   PetscTruth isascii,isbinary,isdraw;
1240 
1241   PetscFunctionBegin;
1242   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1243   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1244   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1245   if (isascii){
1246     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1247   } else if (isbinary) {
1248     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1249   } else if (isdraw) {
1250     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1251   } else {
1252     Mat B;
1253     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
1254     ierr = MatView(B,viewer);CHKERRQ(ierr);
1255     ierr = MatDestroy(B);CHKERRQ(ierr);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1263 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
1264 {
1265   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1266   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1267   int        *ai = a->i,*ailen = a->ilen;
1268   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1269   MatScalar  *ap,*aa = a->a,zero = 0.0;
1270 
1271   PetscFunctionBegin;
1272   for (k=0; k<m; k++) { /* loop over rows */
1273     row  = im[k]; brow = row/bs;
1274     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1275     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
1276     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1277     nrow = ailen[brow];
1278     for (l=0; l<n; l++) { /* loop over columns */
1279       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1280       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
1281       col  = in[l] ;
1282       bcol = col/bs;
1283       cidx = col%bs;
1284       ridx = row%bs;
1285       high = nrow;
1286       low  = 0; /* assume unsorted */
1287       while (high-low > 5) {
1288         t = (low+high)/2;
1289         if (rp[t] > bcol) high = t;
1290         else             low  = t;
1291       }
1292       for (i=low; i<high; i++) {
1293         if (rp[i] > bcol) break;
1294         if (rp[i] == bcol) {
1295           *v++ = ap[bs2*i+bs*cidx+ridx];
1296           goto finished;
1297         }
1298       }
1299       *v++ = zero;
1300       finished:;
1301     }
1302   }
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #if defined(PETSC_USE_MAT_SINGLE)
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1309 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
1310 {
1311   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
1312   int         ierr,i,N = m*n*b->bs2;
1313   MatScalar   *vsingle;
1314 
1315   PetscFunctionBegin;
1316   if (N > b->setvalueslen) {
1317     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1318     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1319     b->setvalueslen  = N;
1320   }
1321   vsingle = b->setvaluescopy;
1322   for (i=0; i<N; i++) {
1323     vsingle[i] = v[i];
1324   }
1325   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1326   PetscFunctionReturn(0);
1327 }
1328 #endif
1329 
1330 
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1333 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
1334 {
1335   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1336   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1337   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1338   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
1339   PetscTruth        roworiented=a->roworiented;
1340   const MatScalar   *value = v;
1341   MatScalar         *ap,*aa = a->a,*bap;
1342 
1343   PetscFunctionBegin;
1344   if (roworiented) {
1345     stepval = (n-1)*bs;
1346   } else {
1347     stepval = (m-1)*bs;
1348   }
1349   for (k=0; k<m; k++) { /* loop over added rows */
1350     row  = im[k];
1351     if (row < 0) continue;
1352 #if defined(PETSC_USE_BOPT_g)
1353     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
1354 #endif
1355     rp   = aj + ai[row];
1356     ap   = aa + bs2*ai[row];
1357     rmax = imax[row];
1358     nrow = ailen[row];
1359     low  = 0;
1360     for (l=0; l<n; l++) { /* loop over added columns */
1361       if (in[l] < 0) continue;
1362 #if defined(PETSC_USE_BOPT_g)
1363       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
1364 #endif
1365       col = in[l];
1366       if (roworiented) {
1367         value = v + k*(stepval+bs)*bs + l*bs;
1368       } else {
1369         value = v + l*(stepval+bs)*bs + k*bs;
1370       }
1371       if (!sorted) low = 0; high = nrow;
1372       while (high-low > 7) {
1373         t = (low+high)/2;
1374         if (rp[t] > col) high = t;
1375         else             low  = t;
1376       }
1377       for (i=low; i<high; i++) {
1378         if (rp[i] > col) break;
1379         if (rp[i] == col) {
1380           bap  = ap +  bs2*i;
1381           if (roworiented) {
1382             if (is == ADD_VALUES) {
1383               for (ii=0; ii<bs; ii++,value+=stepval) {
1384                 for (jj=ii; jj<bs2; jj+=bs) {
1385                   bap[jj] += *value++;
1386                 }
1387               }
1388             } else {
1389               for (ii=0; ii<bs; ii++,value+=stepval) {
1390                 for (jj=ii; jj<bs2; jj+=bs) {
1391                   bap[jj] = *value++;
1392                 }
1393               }
1394             }
1395           } else {
1396             if (is == ADD_VALUES) {
1397               for (ii=0; ii<bs; ii++,value+=stepval) {
1398                 for (jj=0; jj<bs; jj++) {
1399                   *bap++ += *value++;
1400                 }
1401               }
1402             } else {
1403               for (ii=0; ii<bs; ii++,value+=stepval) {
1404                 for (jj=0; jj<bs; jj++) {
1405                   *bap++  = *value++;
1406                 }
1407               }
1408             }
1409           }
1410           goto noinsert2;
1411         }
1412       }
1413       if (nonew == 1) goto noinsert2;
1414       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1415       if (nrow >= rmax) {
1416         /* there is no extra room in row, therefore enlarge */
1417         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1418         MatScalar *new_a;
1419 
1420         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1421 
1422         /* malloc new storage space */
1423         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1424 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1425         new_j   = (int*)(new_a + bs2*new_nz);
1426         new_i   = new_j + new_nz;
1427 
1428         /* copy over old data into new slots */
1429         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1430         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1431         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
1432         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1433         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
1434         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1435         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1436         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1437         /* free up old matrix storage */
1438         ierr = PetscFree(a->a);CHKERRQ(ierr);
1439         if (!a->singlemalloc) {
1440           ierr = PetscFree(a->i);CHKERRQ(ierr);
1441           ierr = PetscFree(a->j);CHKERRQ(ierr);
1442         }
1443         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1444         a->singlemalloc = PETSC_TRUE;
1445 
1446         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1447         rmax = imax[row] = imax[row] + CHUNKSIZE;
1448         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1449         a->maxnz += bs2*CHUNKSIZE;
1450         a->reallocs++;
1451         a->nz++;
1452       }
1453       N = nrow++ - 1;
1454       /* shift up all the later entries in this row */
1455       for (ii=N; ii>=i; ii--) {
1456         rp[ii+1] = rp[ii];
1457         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1458       }
1459       if (N >= i) {
1460         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1461       }
1462       rp[i] = col;
1463       bap   = ap +  bs2*i;
1464       if (roworiented) {
1465         for (ii=0; ii<bs; ii++,value+=stepval) {
1466           for (jj=ii; jj<bs2; jj+=bs) {
1467             bap[jj] = *value++;
1468           }
1469         }
1470       } else {
1471         for (ii=0; ii<bs; ii++,value+=stepval) {
1472           for (jj=0; jj<bs; jj++) {
1473             *bap++  = *value++;
1474           }
1475         }
1476       }
1477       noinsert2:;
1478       low = i;
1479     }
1480     ailen[row] = nrow;
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1487 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1488 {
1489   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1490   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1491   int        m = A->m,*ip,N,*ailen = a->ilen;
1492   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
1493   MatScalar  *aa = a->a,*ap;
1494 
1495   PetscFunctionBegin;
1496   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1497 
1498   if (m) rmax = ailen[0];
1499   for (i=1; i<mbs; i++) {
1500     /* move each row back by the amount of empty slots (fshift) before it*/
1501     fshift += imax[i-1] - ailen[i-1];
1502     rmax   = PetscMax(rmax,ailen[i]);
1503     if (fshift) {
1504       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1505       N = ailen[i];
1506       for (j=0; j<N; j++) {
1507         ip[j-fshift] = ip[j];
1508         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1509       }
1510     }
1511     ai[i] = ai[i-1] + ailen[i-1];
1512   }
1513   if (mbs) {
1514     fshift += imax[mbs-1] - ailen[mbs-1];
1515     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1516   }
1517   /* reset ilen and imax for each row */
1518   for (i=0; i<mbs; i++) {
1519     ailen[i] = imax[i] = ai[i+1] - ai[i];
1520   }
1521   a->nz = ai[mbs];
1522 
1523   /* diagonals may have moved, so kill the diagonal pointers */
1524   a->idiagvalid = PETSC_FALSE;
1525   if (fshift && a->diag) {
1526     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1527     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1528     a->diag = 0;
1529   }
1530   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1531   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1532   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1533   a->reallocs          = 0;
1534   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1535 
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 
1540 
1541 /*
1542    This function returns an array of flags which indicate the locations of contiguous
1543    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1544    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1545    Assume: sizes should be long enough to hold all the values.
1546 */
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1549 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1550 {
1551   int        i,j,k,row;
1552   PetscTruth flg;
1553 
1554   PetscFunctionBegin;
1555   for (i=0,j=0; i<n; j++) {
1556     row = idx[i];
1557     if (row%bs!=0) { /* Not the begining of a block */
1558       sizes[j] = 1;
1559       i++;
1560     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1561       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1562       i++;
1563     } else { /* Begining of the block, so check if the complete block exists */
1564       flg = PETSC_TRUE;
1565       for (k=1; k<bs; k++) {
1566         if (row+k != idx[i+k]) { /* break in the block */
1567           flg = PETSC_FALSE;
1568           break;
1569         }
1570       }
1571       if (flg == PETSC_TRUE) { /* No break in the bs */
1572         sizes[j] = bs;
1573         i+= bs;
1574       } else {
1575         sizes[j] = 1;
1576         i++;
1577       }
1578     }
1579   }
1580   *bs_max = j;
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1586 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1587 {
1588   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1589   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1590   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1591   PetscScalar zero = 0.0;
1592   MatScalar   *aa;
1593 
1594   PetscFunctionBegin;
1595   /* Make a copy of the IS and  sort it */
1596   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1597   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1598 
1599   /* allocate memory for rows,sizes */
1600   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1601   sizes = rows + is_n;
1602 
1603   /* copy IS values to rows, and sort them */
1604   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1605   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1606   if (baij->keepzeroedrows) {
1607     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1608     bs_max = is_n;
1609   } else {
1610     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1611   }
1612   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1613 
1614   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1615     row   = rows[j];
1616     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1617     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1618     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1619     if (sizes[i] == bs && !baij->keepzeroedrows) {
1620       if (diag) {
1621         if (baij->ilen[row/bs] > 0) {
1622           baij->ilen[row/bs]       = 1;
1623           baij->j[baij->i[row/bs]] = row/bs;
1624           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1625         }
1626         /* Now insert all the diagonal values for this bs */
1627         for (k=0; k<bs; k++) {
1628           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1629         }
1630       } else { /* (!diag) */
1631         baij->ilen[row/bs] = 0;
1632       } /* end (!diag) */
1633     } else { /* (sizes[i] != bs) */
1634 #if defined (PETSC_USE_DEBUG)
1635       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1636 #endif
1637       for (k=0; k<count; k++) {
1638         aa[0] =  zero;
1639         aa    += bs;
1640       }
1641       if (diag) {
1642         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1643       }
1644     }
1645   }
1646 
1647   ierr = PetscFree(rows);CHKERRQ(ierr);
1648   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1654 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1655 {
1656   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1657   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1658   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1659   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1660   int         ridx,cidx,bs2=a->bs2,ierr;
1661   PetscTruth  roworiented=a->roworiented;
1662   MatScalar   *ap,value,*aa=a->a,*bap;
1663 
1664   PetscFunctionBegin;
1665   for (k=0; k<m; k++) { /* loop over added rows */
1666     row  = im[k]; brow = row/bs;
1667     if (row < 0) continue;
1668 #if defined(PETSC_USE_BOPT_g)
1669     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1670 #endif
1671     rp   = aj + ai[brow];
1672     ap   = aa + bs2*ai[brow];
1673     rmax = imax[brow];
1674     nrow = ailen[brow];
1675     low  = 0;
1676     for (l=0; l<n; l++) { /* loop over added columns */
1677       if (in[l] < 0) continue;
1678 #if defined(PETSC_USE_BOPT_g)
1679       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1680 #endif
1681       col = in[l]; bcol = col/bs;
1682       ridx = row % bs; cidx = col % bs;
1683       if (roworiented) {
1684         value = v[l + k*n];
1685       } else {
1686         value = v[k + l*m];
1687       }
1688       if (!sorted) low = 0; high = nrow;
1689       while (high-low > 7) {
1690         t = (low+high)/2;
1691         if (rp[t] > bcol) high = t;
1692         else              low  = t;
1693       }
1694       for (i=low; i<high; i++) {
1695         if (rp[i] > bcol) break;
1696         if (rp[i] == bcol) {
1697           bap  = ap +  bs2*i + bs*cidx + ridx;
1698           if (is == ADD_VALUES) *bap += value;
1699           else                  *bap  = value;
1700           goto noinsert1;
1701         }
1702       }
1703       if (nonew == 1) goto noinsert1;
1704       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1705       if (nrow >= rmax) {
1706         /* there is no extra room in row, therefore enlarge */
1707         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1708         MatScalar *new_a;
1709 
1710         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1711 
1712         /* Malloc new storage space */
1713         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1714 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1715         new_j   = (int*)(new_a + bs2*new_nz);
1716         new_i   = new_j + new_nz;
1717 
1718         /* copy over old data into new slots */
1719         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1720         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1721         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1722         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1723         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1724         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1725         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1726         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1727         /* free up old matrix storage */
1728         ierr = PetscFree(a->a);CHKERRQ(ierr);
1729         if (!a->singlemalloc) {
1730           ierr = PetscFree(a->i);CHKERRQ(ierr);
1731           ierr = PetscFree(a->j);CHKERRQ(ierr);
1732         }
1733         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1734         a->singlemalloc = PETSC_TRUE;
1735 
1736         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1737         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1738         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1739         a->maxnz += bs2*CHUNKSIZE;
1740         a->reallocs++;
1741         a->nz++;
1742       }
1743       N = nrow++ - 1;
1744       /* shift up all the later entries in this row */
1745       for (ii=N; ii>=i; ii--) {
1746         rp[ii+1] = rp[ii];
1747         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1748       }
1749       if (N>=i) {
1750         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1751       }
1752       rp[i]                      = bcol;
1753       ap[bs2*i + bs*cidx + ridx] = value;
1754       noinsert1:;
1755       low = i;
1756     }
1757     ailen[brow] = nrow;
1758   }
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1765 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1766 {
1767   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1768   Mat         outA;
1769   int         ierr;
1770   PetscTruth  row_identity,col_identity;
1771 
1772   PetscFunctionBegin;
1773   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1774   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1775   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1776   if (!row_identity || !col_identity) {
1777     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1778   }
1779 
1780   outA          = inA;
1781   inA->factor   = FACTOR_LU;
1782 
1783   if (!a->diag) {
1784     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1785   }
1786 
1787   a->row        = row;
1788   a->col        = col;
1789   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1790   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1791 
1792   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1793   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1794   PetscLogObjectParent(inA,a->icol);
1795 
1796   /*
1797       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1798       for ILU(0) factorization with natural ordering
1799   */
1800   if (a->bs < 8) {
1801     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1802   } else {
1803     if (!a->solve_work) {
1804       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1805       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1806     }
1807   }
1808 
1809   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1810 
1811   PetscFunctionReturn(0);
1812 }
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1815 int MatPrintHelp_SeqBAIJ(Mat A)
1816 {
1817   static PetscTruth called = PETSC_FALSE;
1818   MPI_Comm          comm = A->comm;
1819   int               ierr;
1820 
1821   PetscFunctionBegin;
1822   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1823   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1824   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 EXTERN_C_BEGIN
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1831 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1832 {
1833   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1834   int         i,nz,nbs;
1835 
1836   PetscFunctionBegin;
1837   nz  = baij->maxnz/baij->bs2;
1838   nbs = baij->nbs;
1839   for (i=0; i<nz; i++) {
1840     baij->j[i] = indices[i];
1841   }
1842   baij->nz = nz;
1843   for (i=0; i<nbs; i++) {
1844     baij->ilen[i] = baij->imax[i];
1845   }
1846 
1847   PetscFunctionReturn(0);
1848 }
1849 EXTERN_C_END
1850 
1851 #undef __FUNCT__
1852 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1853 /*@
1854     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1855        in the matrix.
1856 
1857   Input Parameters:
1858 +  mat - the SeqBAIJ matrix
1859 -  indices - the column indices
1860 
1861   Level: advanced
1862 
1863   Notes:
1864     This can be called if you have precomputed the nonzero structure of the
1865   matrix and want to provide it to the matrix object to improve the performance
1866   of the MatSetValues() operation.
1867 
1868     You MUST have set the correct numbers of nonzeros per row in the call to
1869   MatCreateSeqBAIJ().
1870 
1871     MUST be called before any calls to MatSetValues();
1872 
1873 @*/
1874 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1875 {
1876   int ierr,(*f)(Mat,int *);
1877 
1878   PetscFunctionBegin;
1879   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1880   PetscValidPointer(indices,2);
1881   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1882   if (f) {
1883     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1884   } else {
1885     SETERRQ(1,"Wrong type of matrix to set column indices");
1886   }
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 #undef __FUNCT__
1891 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1892 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1893 {
1894   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1895   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1896   PetscReal    atmp;
1897   PetscScalar  *x,zero = 0.0;
1898   MatScalar    *aa;
1899   int          ncols,brow,krow,kcol;
1900 
1901   PetscFunctionBegin;
1902   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1903   bs   = a->bs;
1904   aa   = a->a;
1905   ai   = a->i;
1906   aj   = a->j;
1907   mbs = a->mbs;
1908 
1909   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1910   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1911   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1912   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1913   for (i=0; i<mbs; i++) {
1914     ncols = ai[1] - ai[0]; ai++;
1915     brow  = bs*i;
1916     for (j=0; j<ncols; j++){
1917       /* bcol = bs*(*aj); */
1918       for (kcol=0; kcol<bs; kcol++){
1919         for (krow=0; krow<bs; krow++){
1920           atmp = PetscAbsScalar(*aa); aa++;
1921           row = brow + krow;    /* row index */
1922           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1923           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1924         }
1925       }
1926       aj++;
1927     }
1928   }
1929   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 #undef __FUNCT__
1934 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1935 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1936 {
1937   int        ierr;
1938 
1939   PetscFunctionBegin;
1940   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 #undef __FUNCT__
1945 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1946 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1947 {
1948   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1949   PetscFunctionBegin;
1950   *array = a->a;
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1956 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1957 {
1958   PetscFunctionBegin;
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #include "petscblaslapack.h"
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1965 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1966 {
1967   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1968   int          ierr,one=1,i,bs=y->bs,j,bs2;
1969 
1970   PetscFunctionBegin;
1971   if (str == SAME_NONZERO_PATTERN) {
1972     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1973   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1974     if (y->xtoy && y->XtoY != X) {
1975       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1976       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1977     }
1978     if (!y->xtoy) { /* get xtoy */
1979       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1980       y->XtoY = X;
1981     }
1982     bs2 = bs*bs;
1983     for (i=0; i<x->nz; i++) {
1984       j = 0;
1985       while (j < bs2){
1986         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1987         j++;
1988       }
1989     }
1990     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1991   } else {
1992     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1993   }
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 /* -------------------------------------------------------------------*/
1998 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1999        MatGetRow_SeqBAIJ,
2000        MatRestoreRow_SeqBAIJ,
2001        MatMult_SeqBAIJ_N,
2002 /* 4*/ MatMultAdd_SeqBAIJ_N,
2003        MatMultTranspose_SeqBAIJ,
2004        MatMultTransposeAdd_SeqBAIJ,
2005        MatSolve_SeqBAIJ_N,
2006        0,
2007        0,
2008 /*10*/ 0,
2009        MatLUFactor_SeqBAIJ,
2010        0,
2011        0,
2012        MatTranspose_SeqBAIJ,
2013 /*15*/ MatGetInfo_SeqBAIJ,
2014        MatEqual_SeqBAIJ,
2015        MatGetDiagonal_SeqBAIJ,
2016        MatDiagonalScale_SeqBAIJ,
2017        MatNorm_SeqBAIJ,
2018 /*20*/ 0,
2019        MatAssemblyEnd_SeqBAIJ,
2020        0,
2021        MatSetOption_SeqBAIJ,
2022        MatZeroEntries_SeqBAIJ,
2023 /*25*/ MatZeroRows_SeqBAIJ,
2024        MatLUFactorSymbolic_SeqBAIJ,
2025        MatLUFactorNumeric_SeqBAIJ_N,
2026        0,
2027        0,
2028 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2029        MatILUFactorSymbolic_SeqBAIJ,
2030        0,
2031        MatGetArray_SeqBAIJ,
2032        MatRestoreArray_SeqBAIJ,
2033 /*35*/ MatDuplicate_SeqBAIJ,
2034        0,
2035        0,
2036        MatILUFactor_SeqBAIJ,
2037        0,
2038 /*40*/ MatAXPY_SeqBAIJ,
2039        MatGetSubMatrices_SeqBAIJ,
2040        MatIncreaseOverlap_SeqBAIJ,
2041        MatGetValues_SeqBAIJ,
2042        0,
2043 /*45*/ MatPrintHelp_SeqBAIJ,
2044        MatScale_SeqBAIJ,
2045        0,
2046        0,
2047        0,
2048 /*50*/ MatGetBlockSize_SeqBAIJ,
2049        MatGetRowIJ_SeqBAIJ,
2050        MatRestoreRowIJ_SeqBAIJ,
2051        0,
2052        0,
2053 /*55*/ 0,
2054        0,
2055        0,
2056        0,
2057        MatSetValuesBlocked_SeqBAIJ,
2058 /*60*/ MatGetSubMatrix_SeqBAIJ,
2059        MatDestroy_SeqBAIJ,
2060        MatView_SeqBAIJ,
2061        MatGetPetscMaps_Petsc,
2062        0,
2063 /*65*/ 0,
2064        0,
2065        0,
2066        0,
2067        0,
2068 /*70*/ MatGetRowMax_SeqBAIJ,
2069        MatConvert_Basic,
2070        0,
2071        0,
2072        0,
2073 /*75*/ 0,
2074        0,
2075        0,
2076        0,
2077        0,
2078 /*80*/ 0,
2079        0,
2080        0,
2081        0,
2082 /*85*/ MatLoad_SeqBAIJ,
2083        0,
2084        0,
2085        0,
2086        0
2087 };
2088 
2089 EXTERN_C_BEGIN
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2092 int MatStoreValues_SeqBAIJ(Mat mat)
2093 {
2094   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2095   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
2096   int         ierr;
2097 
2098   PetscFunctionBegin;
2099   if (aij->nonew != 1) {
2100     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2101   }
2102 
2103   /* allocate space for values if not already there */
2104   if (!aij->saved_values) {
2105     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2106   }
2107 
2108   /* copy values over */
2109   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2110   PetscFunctionReturn(0);
2111 }
2112 EXTERN_C_END
2113 
2114 EXTERN_C_BEGIN
2115 #undef __FUNCT__
2116 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2117 int MatRetrieveValues_SeqBAIJ(Mat mat)
2118 {
2119   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2120   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
2121 
2122   PetscFunctionBegin;
2123   if (aij->nonew != 1) {
2124     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2125   }
2126   if (!aij->saved_values) {
2127     SETERRQ(1,"Must call MatStoreValues(A);first");
2128   }
2129 
2130   /* copy values over */
2131   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2132   PetscFunctionReturn(0);
2133 }
2134 EXTERN_C_END
2135 
2136 EXTERN_C_BEGIN
2137 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
2138 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*);
2139 EXTERN_C_END
2140 
2141 EXTERN_C_BEGIN
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2144 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
2145 {
2146   Mat_SeqBAIJ *b;
2147   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
2148   PetscTruth  flg;
2149 
2150   PetscFunctionBegin;
2151 
2152   B->preallocated = PETSC_TRUE;
2153   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2154   if (nnz && newbs != bs) {
2155     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
2156   }
2157   bs   = newbs;
2158 
2159   mbs  = B->m/bs;
2160   nbs  = B->n/bs;
2161   bs2  = bs*bs;
2162 
2163   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2164     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
2165   }
2166 
2167   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2168   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
2169   if (nnz) {
2170     for (i=0; i<mbs; i++) {
2171       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
2172       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
2173     }
2174   }
2175 
2176   b       = (Mat_SeqBAIJ*)B->data;
2177   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2178   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2179   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2180   if (!flg) {
2181     switch (bs) {
2182     case 1:
2183       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2184       B->ops->mult            = MatMult_SeqBAIJ_1;
2185       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2186       break;
2187     case 2:
2188       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2189       B->ops->mult            = MatMult_SeqBAIJ_2;
2190       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2191       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2192       break;
2193     case 3:
2194       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2195       B->ops->mult            = MatMult_SeqBAIJ_3;
2196       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2197       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2198       break;
2199     case 4:
2200       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2201       B->ops->mult            = MatMult_SeqBAIJ_4;
2202       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2203       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2204       break;
2205     case 5:
2206       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2207       B->ops->mult            = MatMult_SeqBAIJ_5;
2208       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2209       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2210       break;
2211     case 6:
2212       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2213       B->ops->mult            = MatMult_SeqBAIJ_6;
2214       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2215       break;
2216     case 7:
2217       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2218       B->ops->mult            = MatMult_SeqBAIJ_7;
2219       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2220       break;
2221     default:
2222       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2223       B->ops->mult            = MatMult_SeqBAIJ_N;
2224       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2225       break;
2226     }
2227   }
2228   b->bs      = bs;
2229   b->mbs     = mbs;
2230   b->nbs     = nbs;
2231   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
2232   if (!nnz) {
2233     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2234     else if (nz <= 0)        nz = 1;
2235     for (i=0; i<mbs; i++) b->imax[i] = nz;
2236     nz = nz*mbs;
2237   } else {
2238     nz = 0;
2239     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2240   }
2241 
2242   /* allocate the matrix space */
2243   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2244   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2245   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2246   b->j  = (int*)(b->a + nz*bs2);
2247   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
2248   b->i  = b->j + nz;
2249   b->singlemalloc = PETSC_TRUE;
2250 
2251   b->i[0] = 0;
2252   for (i=1; i<mbs+1; i++) {
2253     b->i[i] = b->i[i-1] + b->imax[i-1];
2254   }
2255 
2256   /* b->ilen will count nonzeros in each block row so far. */
2257   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
2258   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2259   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2260 
2261   b->bs               = bs;
2262   b->bs2              = bs2;
2263   b->mbs              = mbs;
2264   b->nz               = 0;
2265   b->maxnz            = nz*bs2;
2266   B->info.nz_unneeded = (PetscReal)b->maxnz;
2267   PetscFunctionReturn(0);
2268 }
2269 EXTERN_C_END
2270 
2271 /*MC
2272    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2273    block sparse compressed row format.
2274 
2275    Options Database Keys:
2276 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2277 
2278   Level: beginner
2279 
2280 .seealso: MatCreateSeqBAIJ
2281 M*/
2282 
2283 EXTERN_C_BEGIN
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatCreate_SeqBAIJ"
2286 int MatCreate_SeqBAIJ(Mat B)
2287 {
2288   int         ierr,size;
2289   Mat_SeqBAIJ *b;
2290 
2291   PetscFunctionBegin;
2292   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2293   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2294 
2295   B->m = B->M = PetscMax(B->m,B->M);
2296   B->n = B->N = PetscMax(B->n,B->N);
2297   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2298   B->data = (void*)b;
2299   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2300   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2301   B->factor           = 0;
2302   B->lupivotthreshold = 1.0;
2303   B->mapping          = 0;
2304   b->row              = 0;
2305   b->col              = 0;
2306   b->icol             = 0;
2307   b->reallocs         = 0;
2308   b->saved_values     = 0;
2309 #if defined(PETSC_USE_MAT_SINGLE)
2310   b->setvalueslen     = 0;
2311   b->setvaluescopy    = PETSC_NULL;
2312 #endif
2313 
2314   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2315   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2316 
2317   b->sorted           = PETSC_FALSE;
2318   b->roworiented      = PETSC_TRUE;
2319   b->nonew            = 0;
2320   b->diag             = 0;
2321   b->solve_work       = 0;
2322   b->mult_work        = 0;
2323   B->spptr            = 0;
2324   B->info.nz_unneeded = (PetscReal)b->maxnz;
2325   b->keepzeroedrows   = PETSC_FALSE;
2326   b->xtoy              = 0;
2327   b->XtoY              = 0;
2328 
2329   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2330                                      "MatStoreValues_SeqBAIJ",
2331                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2332   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2333                                      "MatRetrieveValues_SeqBAIJ",
2334                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2335   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2336                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2337                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2338   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2339                                      "MatConvert_SeqBAIJ_SeqAIJ",
2340                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2341 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2342                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2343                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2344   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2345                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2346                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2347   PetscFunctionReturn(0);
2348 }
2349 EXTERN_C_END
2350 
2351 #undef __FUNCT__
2352 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2353 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2354 {
2355   Mat         C;
2356   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
2357   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
2358 
2359   PetscFunctionBegin;
2360   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2361 
2362   *B = 0;
2363   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2364   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2365   c    = (Mat_SeqBAIJ*)C->data;
2366 
2367   C->M   = A->M;
2368   C->N   = A->N;
2369   c->bs  = a->bs;
2370   c->bs2 = a->bs2;
2371   c->mbs = a->mbs;
2372   c->nbs = a->nbs;
2373 
2374   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
2375   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
2376   for (i=0; i<mbs; i++) {
2377     c->imax[i] = a->imax[i];
2378     c->ilen[i] = a->ilen[i];
2379   }
2380 
2381   /* allocate the matrix space */
2382   c->singlemalloc = PETSC_TRUE;
2383   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
2384   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2385   c->j = (int*)(c->a + nz*bs2);
2386   c->i = c->j + nz;
2387   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
2388   if (mbs > 0) {
2389     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
2390     if (cpvalues == MAT_COPY_VALUES) {
2391       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2392     } else {
2393       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2394     }
2395   }
2396 
2397   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2398   c->sorted      = a->sorted;
2399   c->roworiented = a->roworiented;
2400   c->nonew       = a->nonew;
2401 
2402   if (a->diag) {
2403     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
2404     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
2405     for (i=0; i<mbs; i++) {
2406       c->diag[i] = a->diag[i];
2407     }
2408   } else c->diag        = 0;
2409   c->nz                 = a->nz;
2410   c->maxnz              = a->maxnz;
2411   c->solve_work         = 0;
2412   c->mult_work          = 0;
2413   C->preallocated       = PETSC_TRUE;
2414   C->assembled          = PETSC_TRUE;
2415   *B = C;
2416   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatLoad_SeqBAIJ"
2422 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2423 {
2424   Mat_SeqBAIJ  *a;
2425   Mat          B;
2426   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
2427   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2428   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2429   int          *masked,nmask,tmp,bs2,ishift;
2430   PetscScalar  *aa;
2431   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2432 
2433   PetscFunctionBegin;
2434   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2435   bs2  = bs*bs;
2436 
2437   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2438   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2439   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2440   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2441   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2442   M = header[1]; N = header[2]; nz = header[3];
2443 
2444   if (header[3] < 0) {
2445     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2446   }
2447 
2448   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2449 
2450   /*
2451      This code adds extra rows to make sure the number of rows is
2452     divisible by the blocksize
2453   */
2454   mbs        = M/bs;
2455   extra_rows = bs - M + bs*(mbs);
2456   if (extra_rows == bs) extra_rows = 0;
2457   else                  mbs++;
2458   if (extra_rows) {
2459     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2460   }
2461 
2462   /* read in row lengths */
2463   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2464   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2465   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2466 
2467   /* read in column indices */
2468   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
2469   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2470   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2471 
2472   /* loop over row lengths determining block row lengths */
2473   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
2474   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
2475   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
2476   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
2477   masked   = mask + mbs;
2478   rowcount = 0; nzcount = 0;
2479   for (i=0; i<mbs; i++) {
2480     nmask = 0;
2481     for (j=0; j<bs; j++) {
2482       kmax = rowlengths[rowcount];
2483       for (k=0; k<kmax; k++) {
2484         tmp = jj[nzcount++]/bs;
2485         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2486       }
2487       rowcount++;
2488     }
2489     browlengths[i] += nmask;
2490     /* zero out the mask elements we set */
2491     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2492   }
2493 
2494   /* create our matrix */
2495   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2496   ierr = MatSetType(B,type);CHKERRQ(ierr);
2497   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2498   a = (Mat_SeqBAIJ*)B->data;
2499 
2500   /* set matrix "i" values */
2501   a->i[0] = 0;
2502   for (i=1; i<= mbs; i++) {
2503     a->i[i]      = a->i[i-1] + browlengths[i-1];
2504     a->ilen[i-1] = browlengths[i-1];
2505   }
2506   a->nz         = 0;
2507   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2508 
2509   /* read in nonzero values */
2510   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2511   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2512   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2513 
2514   /* set "a" and "j" values into matrix */
2515   nzcount = 0; jcount = 0;
2516   for (i=0; i<mbs; i++) {
2517     nzcountb = nzcount;
2518     nmask    = 0;
2519     for (j=0; j<bs; j++) {
2520       kmax = rowlengths[i*bs+j];
2521       for (k=0; k<kmax; k++) {
2522         tmp = jj[nzcount++]/bs;
2523 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2524       }
2525     }
2526     /* sort the masked values */
2527     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2528 
2529     /* set "j" values into matrix */
2530     maskcount = 1;
2531     for (j=0; j<nmask; j++) {
2532       a->j[jcount++]  = masked[j];
2533       mask[masked[j]] = maskcount++;
2534     }
2535     /* set "a" values into matrix */
2536     ishift = bs2*a->i[i];
2537     for (j=0; j<bs; j++) {
2538       kmax = rowlengths[i*bs+j];
2539       for (k=0; k<kmax; k++) {
2540         tmp       = jj[nzcountb]/bs ;
2541         block     = mask[tmp] - 1;
2542         point     = jj[nzcountb] - bs*tmp;
2543         idx       = ishift + bs2*block + j + bs*point;
2544         a->a[idx] = (MatScalar)aa[nzcountb++];
2545       }
2546     }
2547     /* zero out the mask elements we set */
2548     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2549   }
2550   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2551 
2552   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2553   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2554   ierr = PetscFree(aa);CHKERRQ(ierr);
2555   ierr = PetscFree(jj);CHKERRQ(ierr);
2556   ierr = PetscFree(mask);CHKERRQ(ierr);
2557 
2558   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2559   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2560   ierr = MatView_Private(B);CHKERRQ(ierr);
2561 
2562   *A = B;
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 #undef __FUNCT__
2567 #define __FUNCT__ "MatCreateSeqBAIJ"
2568 /*@C
2569    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2570    compressed row) format.  For good matrix assembly performance the
2571    user should preallocate the matrix storage by setting the parameter nz
2572    (or the array nnz).  By setting these parameters accurately, performance
2573    during matrix assembly can be increased by more than a factor of 50.
2574 
2575    Collective on MPI_Comm
2576 
2577    Input Parameters:
2578 +  comm - MPI communicator, set to PETSC_COMM_SELF
2579 .  bs - size of block
2580 .  m - number of rows
2581 .  n - number of columns
2582 .  nz - number of nonzero blocks  per block row (same for all rows)
2583 -  nnz - array containing the number of nonzero blocks in the various block rows
2584          (possibly different for each block row) or PETSC_NULL
2585 
2586    Output Parameter:
2587 .  A - the matrix
2588 
2589    Options Database Keys:
2590 .   -mat_no_unroll - uses code that does not unroll the loops in the
2591                      block calculations (much slower)
2592 .    -mat_block_size - size of the blocks to use
2593 
2594    Level: intermediate
2595 
2596    Notes:
2597    A nonzero block is any block that as 1 or more nonzeros in it
2598 
2599    The block AIJ format is fully compatible with standard Fortran 77
2600    storage.  That is, the stored row and column indices can begin at
2601    either one (as in Fortran) or zero.  See the users' manual for details.
2602 
2603    Specify the preallocated storage with either nz or nnz (not both).
2604    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2605    allocation.  For additional details, see the users manual chapter on
2606    matrices.
2607 
2608 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2609 @*/
2610 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2611 {
2612   int ierr;
2613 
2614   PetscFunctionBegin;
2615   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2616   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2617   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 #undef __FUNCT__
2622 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2623 /*@C
2624    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2625    per row in the matrix. For good matrix assembly performance the
2626    user should preallocate the matrix storage by setting the parameter nz
2627    (or the array nnz).  By setting these parameters accurately, performance
2628    during matrix assembly can be increased by more than a factor of 50.
2629 
2630    Collective on MPI_Comm
2631 
2632    Input Parameters:
2633 +  A - the matrix
2634 .  bs - size of block
2635 .  nz - number of block nonzeros per block row (same for all rows)
2636 -  nnz - array containing the number of block nonzeros in the various block rows
2637          (possibly different for each block row) or PETSC_NULL
2638 
2639    Options Database Keys:
2640 .   -mat_no_unroll - uses code that does not unroll the loops in the
2641                      block calculations (much slower)
2642 .    -mat_block_size - size of the blocks to use
2643 
2644    Level: intermediate
2645 
2646    Notes:
2647    The block AIJ format is fully compatible with standard Fortran 77
2648    storage.  That is, the stored row and column indices can begin at
2649    either one (as in Fortran) or zero.  See the users' manual for details.
2650 
2651    Specify the preallocated storage with either nz or nnz (not both).
2652    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2653    allocation.  For additional details, see the users manual chapter on
2654    matrices.
2655 
2656 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2657 @*/
2658 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2659 {
2660   int ierr,(*f)(Mat,int,int,const int[]);
2661 
2662   PetscFunctionBegin;
2663   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2664   if (f) {
2665     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2666   }
2667   PetscFunctionReturn(0);
2668 }
2669 
2670