xref: /petsc/src/mat/impls/baij/seq/baij.c (revision cb2b73ccb40d913b468b2548828398eabfd08a8e)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
52   PetscReal      shift = 0.0;
53 
54   PetscFunctionBegin;
55   if (a->idiagvalid) PetscFunctionReturn(0);
56   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
57   diag_offset = a->diag;
58   if (!a->idiag) {
59     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
60   }
61   diag  = a->idiag;
62   mdiag = a->idiag+bs*bs*mbs;
63   /* factor and invert each block */
64   switch (bs){
65     case 2:
66       for (i=0; i<mbs; i++) {
67         odiag   = v + 4*diag_offset[i];
68         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
70 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
71 	diag    += 4;
72 	mdiag   += 4;
73       }
74       break;
75     case 3:
76       for (i=0; i<mbs; i++) {
77         odiag    = v + 9*diag_offset[i];
78         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
80         diag[8]  = odiag[8];
81         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
82         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
83         mdiag[8] = odiag[8];
84 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
85 	diag    += 9;
86 	mdiag   += 9;
87       }
88       break;
89     case 4:
90       for (i=0; i<mbs; i++) {
91         odiag  = v + 16*diag_offset[i];
92         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
94 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
95 	diag  += 16;
96 	mdiag += 16;
97       }
98       break;
99     case 5:
100       for (i=0; i<mbs; i++) {
101         odiag = v + 25*diag_offset[i];
102         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
105 	diag  += 25;
106 	mdiag += 25;
107       }
108       break;
109     default:
110       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
111   }
112   a->idiagvalid = PETSC_TRUE;
113   PetscFunctionReturn(0);
114 }
115 EXTERN_C_END
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
120 {
121   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
122   PetscScalar        *x,x1,s1;
123   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
124   PetscErrorCode     ierr;
125   PetscInt           m = a->mbs,i,i2,nz,idx;
126   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
127 
128   PetscFunctionBegin;
129   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
130   its = its*lits;
131   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
132   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
133   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
134   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
135   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
136 
137   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
138 
139   diag  = a->diag;
140   idiag = a->idiag;
141   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
142   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
143 
144   if (flag & SOR_ZERO_INITIAL_GUESS) {
145     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
146       x[0] = b[0]*idiag[0];
147       i2     = 1;
148       idiag += 1;
149       for (i=1; i<m; i++) {
150         v     = aa + ai[i];
151         vi    = aj + ai[i];
152         nz    = diag[i] - ai[i];
153         s1    = b[i2];
154         while (nz--) {
155           idx  = (*vi++);
156           x1   = x[idx];
157           s1  -= v[0]*x1;
158           v   += 1;
159         }
160         x[i2]   = idiag[0]*s1;
161         idiag   += 1;
162         i2      += 1;
163       }
164       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
165       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
166     }
167     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
168         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
169       i2    = 0;
170       mdiag = a->idiag+a->mbs;
171       for (i=0; i<m; i++) {
172         x1      = x[i2];
173         x[i2]   = mdiag[0]*x1;
174         mdiag  += 1;
175         i2     += 1;
176       }
177       ierr = PetscLogFlops(m);CHKERRQ(ierr);
178     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
179       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
180     }
181     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
182       idiag   = a->idiag+a->mbs - 1;
183       i2      = m - 1;
184       x1      = x[i2];
185       x[i2]   = idiag[0]*x1;
186       idiag -= 1;
187       i2    -= 1;
188       for (i=m-2; i>=0; i--) {
189         v     = aa + (diag[i]+1);
190         vi    = aj + diag[i] + 1;
191         nz    = ai[i+1] - diag[i] - 1;
192         s1    = x[i2];
193         while (nz--) {
194           idx  = (*vi++);
195           x1   = x[idx];
196           s1  -= v[0]*x1;
197           v   += 1;
198         }
199         x[i2]   = idiag[0]*s1;
200         idiag   -= 2;
201         i2      -= 1;
202       }
203       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
204     }
205   } else {
206     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
207   }
208   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
209   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
215 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
216 {
217   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
218   PetscScalar        *x,x1,x2,s1,s2;
219   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
220   PetscErrorCode     ierr;
221   PetscInt           m = a->mbs,i,i2,nz,idx;
222   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
223 
224   PetscFunctionBegin;
225   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
226   its = its*lits;
227   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
228   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
229   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
230   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
231   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
232 
233   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
234 
235   diag  = a->diag;
236   idiag = a->idiag;
237   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
238   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
239 
240   if (flag & SOR_ZERO_INITIAL_GUESS) {
241     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
242       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
243       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
244       i2     = 2;
245       idiag += 4;
246       for (i=1; i<m; i++) {
247 	v     = aa + 4*ai[i];
248 	vi    = aj + ai[i];
249 	nz    = diag[i] - ai[i];
250 	s1    = b[i2]; s2 = b[i2+1];
251 	while (nz--) {
252 	  idx  = 2*(*vi++);
253 	  x1   = x[idx]; x2 = x[1+idx];
254 	  s1  -= v[0]*x1 + v[2]*x2;
255 	  s2  -= v[1]*x1 + v[3]*x2;
256 	  v   += 4;
257 	}
258 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
259 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
260         idiag   += 4;
261         i2      += 2;
262       }
263       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
264       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
265     }
266     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
267         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
268       i2    = 0;
269       mdiag = a->idiag+4*a->mbs;
270       for (i=0; i<m; i++) {
271         x1      = x[i2]; x2 = x[i2+1];
272         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
273         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
274         mdiag  += 4;
275         i2     += 2;
276       }
277       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
278     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
279       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
280     }
281     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
282       idiag   = a->idiag+4*a->mbs - 4;
283       i2      = 2*m - 2;
284       x1      = x[i2]; x2 = x[i2+1];
285       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
286       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
287       idiag -= 4;
288       i2    -= 2;
289       for (i=m-2; i>=0; i--) {
290 	v     = aa + 4*(diag[i]+1);
291 	vi    = aj + diag[i] + 1;
292 	nz    = ai[i+1] - diag[i] - 1;
293 	s1    = x[i2]; s2 = x[i2+1];
294 	while (nz--) {
295 	  idx  = 2*(*vi++);
296 	  x1   = x[idx]; x2 = x[1+idx];
297 	  s1  -= v[0]*x1 + v[2]*x2;
298 	  s2  -= v[1]*x1 + v[3]*x2;
299 	  v   += 4;
300 	}
301 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
302 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
303         idiag   -= 4;
304         i2      -= 2;
305       }
306       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
307     }
308   } else {
309     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
310   }
311   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
312   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 #undef __FUNCT__
317 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
318 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
319 {
320   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
321   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
322   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
323   PetscErrorCode     ierr;
324   PetscInt           m = a->mbs,i,i2,nz,idx;
325   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
326 
327   PetscFunctionBegin;
328   its = its*lits;
329   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
330   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
331   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
332   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
333   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
334   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
335 
336   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
337 
338   diag  = a->diag;
339   idiag = a->idiag;
340   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
341   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
342 
343   if (flag & SOR_ZERO_INITIAL_GUESS) {
344     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
345       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
346       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
347       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
348       i2     = 3;
349       idiag += 9;
350       for (i=1; i<m; i++) {
351         v     = aa + 9*ai[i];
352         vi    = aj + ai[i];
353         nz    = diag[i] - ai[i];
354         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
355         while (nz--) {
356           idx  = 3*(*vi++);
357           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
358           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
359           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
360           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
361           v   += 9;
362         }
363         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
364         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
365         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
366         idiag   += 9;
367         i2      += 3;
368       }
369       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
370       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
371     }
372     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
373         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
374       i2    = 0;
375       mdiag = a->idiag+9*a->mbs;
376       for (i=0; i<m; i++) {
377         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
378         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
379         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
380         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
381         mdiag  += 9;
382         i2     += 3;
383       }
384       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
385     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
386       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
389       idiag   = a->idiag+9*a->mbs - 9;
390       i2      = 3*m - 3;
391       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
392       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
393       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
394       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
395       idiag -= 9;
396       i2    -= 3;
397       for (i=m-2; i>=0; i--) {
398         v     = aa + 9*(diag[i]+1);
399         vi    = aj + diag[i] + 1;
400         nz    = ai[i+1] - diag[i] - 1;
401         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
402         while (nz--) {
403           idx  = 3*(*vi++);
404           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
405           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
406           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
407           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
408           v   += 9;
409         }
410         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
411         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
412         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
413         idiag   -= 9;
414         i2      -= 3;
415       }
416       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
417     }
418   } else {
419     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
420   }
421   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
422   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
428 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
429 {
430   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
431   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
432   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
433   PetscErrorCode     ierr;
434   PetscInt           m = a->mbs,i,i2,nz,idx;
435   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
436 
437   PetscFunctionBegin;
438   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
439   its = its*lits;
440   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
441   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
442   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
443   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
444   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
445 
446   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
447 
448   diag  = a->diag;
449   idiag = a->idiag;
450   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
451   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
452 
453   if (flag & SOR_ZERO_INITIAL_GUESS) {
454     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
455       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
456       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
457       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
458       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
459       i2     = 4;
460       idiag += 16;
461       for (i=1; i<m; i++) {
462 	v     = aa + 16*ai[i];
463 	vi    = aj + ai[i];
464 	nz    = diag[i] - ai[i];
465 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
466 	while (nz--) {
467 	  idx  = 4*(*vi++);
468 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
469 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
470 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
471 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
472 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
473 	  v   += 16;
474 	}
475 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
476 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
477 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
478 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
479         idiag   += 16;
480         i2      += 4;
481       }
482       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
483       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
484     }
485     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
486         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
487       i2    = 0;
488       mdiag = a->idiag+16*a->mbs;
489       for (i=0; i<m; i++) {
490         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
491         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
492         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
493         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
494         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
495         mdiag  += 16;
496         i2     += 4;
497       }
498       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
499     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
500       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
501     }
502     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
503       idiag   = a->idiag+16*a->mbs - 16;
504       i2      = 4*m - 4;
505       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
506       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
507       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
508       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
509       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
510       idiag -= 16;
511       i2    -= 4;
512       for (i=m-2; i>=0; i--) {
513 	v     = aa + 16*(diag[i]+1);
514 	vi    = aj + diag[i] + 1;
515 	nz    = ai[i+1] - diag[i] - 1;
516 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
517 	while (nz--) {
518 	  idx  = 4*(*vi++);
519 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
520 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
521 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
522 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
523 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
524 	  v   += 16;
525 	}
526 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
527 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
528 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
529 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
530         idiag   -= 16;
531         i2      -= 4;
532       }
533       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
534     }
535   } else {
536     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
537   }
538   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
539   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
540   PetscFunctionReturn(0);
541 }
542 
543 #undef __FUNCT__
544 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
545 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
546 {
547   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
548   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
549   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
550   PetscErrorCode     ierr;
551   PetscInt           m = a->mbs,i,i2,nz,idx;
552   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
553 
554   PetscFunctionBegin;
555   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
556   its = its*lits;
557   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
558   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
559   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
560   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
561   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
562 
563   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
564 
565   diag  = a->diag;
566   idiag = a->idiag;
567   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
568   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
569 
570   if (flag & SOR_ZERO_INITIAL_GUESS) {
571     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
572       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
573       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
574       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
575       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
576       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
577       i2     = 5;
578       idiag += 25;
579       for (i=1; i<m; i++) {
580 	v     = aa + 25*ai[i];
581 	vi    = aj + ai[i];
582 	nz    = diag[i] - ai[i];
583 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
584 	while (nz--) {
585 	  idx  = 5*(*vi++);
586 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
587 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
588 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
589 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
590 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
591 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
592 	  v   += 25;
593 	}
594 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
595 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
596 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
597 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
598 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
599         idiag   += 25;
600         i2      += 5;
601       }
602       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
603       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
604     }
605     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
606         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
607       i2    = 0;
608       mdiag = a->idiag+25*a->mbs;
609       for (i=0; i<m; i++) {
610         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
611         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
612         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
613         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
614         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
615         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
616         mdiag  += 25;
617         i2     += 5;
618       }
619       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
620     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
621       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
622     }
623     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
624       idiag   = a->idiag+25*a->mbs - 25;
625       i2      = 5*m - 5;
626       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
627       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
628       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
629       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
630       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
631       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
632       idiag -= 25;
633       i2    -= 5;
634       for (i=m-2; i>=0; i--) {
635 	v     = aa + 25*(diag[i]+1);
636 	vi    = aj + diag[i] + 1;
637 	nz    = ai[i+1] - diag[i] - 1;
638 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
639 	while (nz--) {
640 	  idx  = 5*(*vi++);
641 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
642 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
643 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
644 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
645 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
646 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
647 	  v   += 25;
648 	}
649 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
650 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
651 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
652 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
653 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
654         idiag   -= 25;
655         i2      -= 5;
656       }
657       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
658     }
659   } else {
660     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
661   }
662   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
663   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
669 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
670 {
671   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
672   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
673   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
674   PetscErrorCode     ierr;
675   PetscInt           m = a->mbs,i,i2,nz,idx;
676   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
677 
678   PetscFunctionBegin;
679   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
680   its = its*lits;
681   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
682   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
683   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
684   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
685   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
686 
687   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
688 
689   diag  = a->diag;
690   idiag = a->idiag;
691   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
692   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
693 
694   if (flag & SOR_ZERO_INITIAL_GUESS) {
695     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
696       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
697       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
698       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
699       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
700       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
701       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
702       i2     = 6;
703       idiag += 36;
704       for (i=1; i<m; i++) {
705         v     = aa + 36*ai[i];
706         vi    = aj + ai[i];
707         nz    = diag[i] - ai[i];
708         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
709         while (nz--) {
710           idx  = 6*(*vi++);
711           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
712           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
713           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
714           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
715           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
716           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
717           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
718           v   += 36;
719         }
720         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
721         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
722         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
723         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
724         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
725         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
726         idiag   += 36;
727         i2      += 6;
728       }
729       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
730       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
731     }
732     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
733         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
734       i2    = 0;
735       mdiag = a->idiag+36*a->mbs;
736       for (i=0; i<m; i++) {
737         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
738         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
739         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
740         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
741         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
742         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
743         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
744         mdiag  += 36;
745         i2     += 6;
746       }
747       ierr = PetscLogFlops(60*m);CHKERRQ(ierr);
748     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
749       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
750     }
751     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
752       idiag   = a->idiag+36*a->mbs - 36;
753       i2      = 6*m - 6;
754       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
755       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
756       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
757       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
758       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
759       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
760       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
761       idiag -= 36;
762       i2    -= 6;
763       for (i=m-2; i>=0; i--) {
764         v     = aa + 36*(diag[i]+1);
765         vi    = aj + diag[i] + 1;
766         nz    = ai[i+1] - diag[i] - 1;
767         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
768         while (nz--) {
769           idx  = 6*(*vi++);
770           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
771           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
772           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
773           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
774           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
775           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
776           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
777           v   += 36;
778         }
779         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
780         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
781         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
782         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
783         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
784         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
785         idiag   -= 36;
786         i2      -= 6;
787       }
788       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
789     }
790   } else {
791     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
792   }
793   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
794   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
800 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
801 {
802   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
803   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
804   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
805   PetscErrorCode     ierr;
806   PetscInt           m = a->mbs,i,i2,nz,idx;
807   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
808 
809   PetscFunctionBegin;
810   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
811   its = its*lits;
812   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
813   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
814   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
815   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
816   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
817 
818   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
819 
820   diag  = a->diag;
821   idiag = a->idiag;
822   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
823   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
824 
825   if (flag & SOR_ZERO_INITIAL_GUESS) {
826     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
827       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
828       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
829       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
830       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
831       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
832       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
833       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
834       i2     = 7;
835       idiag += 49;
836       for (i=1; i<m; i++) {
837         v     = aa + 49*ai[i];
838         vi    = aj + ai[i];
839         nz    = diag[i] - ai[i];
840         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
841         while (nz--) {
842           idx  = 7*(*vi++);
843           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
844           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
845           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
846           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
847           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
848           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
849           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
850           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
851           v   += 49;
852         }
853         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
854         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
855         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
856         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
857         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
858         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
859         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
860         idiag   += 49;
861         i2      += 7;
862       }
863       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
864       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
865     }
866     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
867         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
868       i2    = 0;
869       mdiag = a->idiag+49*a->mbs;
870       for (i=0; i<m; i++) {
871         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
872         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
873         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
874         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
875         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
876         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
877         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
878         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
879         mdiag  += 36;
880         i2     += 6;
881       }
882       ierr = PetscLogFlops(93*m);CHKERRQ(ierr);
883     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
884       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
885     }
886     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
887       idiag   = a->idiag+49*a->mbs - 49;
888       i2      = 7*m - 7;
889       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
890       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
891       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
892       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
893       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
894       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
895       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
896       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
897       idiag -= 49;
898       i2    -= 7;
899       for (i=m-2; i>=0; i--) {
900         v     = aa + 49*(diag[i]+1);
901         vi    = aj + diag[i] + 1;
902         nz    = ai[i+1] - diag[i] - 1;
903         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
904         while (nz--) {
905           idx  = 7*(*vi++);
906           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
907           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
908           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
909           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
910           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
911           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
912           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
913           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
914           v   += 49;
915         }
916         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
917         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
918         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
919         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
920         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
921         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
922         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
923         idiag   -= 49;
924         i2      -= 7;
925       }
926       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
927     }
928   } else {
929     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
930   }
931   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
932   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
933   PetscFunctionReturn(0);
934 }
935 
936 /*
937     Special version for direct calls from Fortran (Used in PETSc-fun3d)
938 */
939 #if defined(PETSC_HAVE_FORTRAN_CAPS)
940 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
941 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
942 #define matsetvaluesblocked4_ matsetvaluesblocked4
943 #endif
944 
945 EXTERN_C_BEGIN
946 #undef __FUNCT__
947 #define __FUNCT__ "matsetvaluesblocked4_"
948 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
949 {
950   Mat               A = *AA;
951   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
952   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
953   PetscInt          *ai=a->i,*ailen=a->ilen;
954   PetscInt          *aj=a->j,stepval,lastcol = -1;
955   const PetscScalar *value = v;
956   MatScalar         *ap,*aa = a->a,*bap;
957 
958   PetscFunctionBegin;
959   if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
960   stepval = (n-1)*4;
961   for (k=0; k<m; k++) { /* loop over added rows */
962     row  = im[k];
963     rp   = aj + ai[row];
964     ap   = aa + 16*ai[row];
965     nrow = ailen[row];
966     low  = 0;
967     high = nrow;
968     for (l=0; l<n; l++) { /* loop over added columns */
969       col = in[l];
970       if (col <= lastcol) low = 0; else high = nrow;
971       lastcol = col;
972       value = v + k*(stepval+4 + l)*4;
973       while (high-low > 7) {
974         t = (low+high)/2;
975         if (rp[t] > col) high = t;
976         else             low  = t;
977       }
978       for (i=low; i<high; i++) {
979         if (rp[i] > col) break;
980         if (rp[i] == col) {
981           bap  = ap +  16*i;
982           for (ii=0; ii<4; ii++,value+=stepval) {
983             for (jj=ii; jj<16; jj+=4) {
984               bap[jj] += *value++;
985             }
986           }
987           goto noinsert2;
988         }
989       }
990       N = nrow++ - 1;
991       high++; /* added new column index thus must search to one higher than before */
992       /* shift up all the later entries in this row */
993       for (ii=N; ii>=i; ii--) {
994         rp[ii+1] = rp[ii];
995         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
996       }
997       if (N >= i) {
998         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
999       }
1000       rp[i] = col;
1001       bap   = ap +  16*i;
1002       for (ii=0; ii<4; ii++,value+=stepval) {
1003         for (jj=ii; jj<16; jj+=4) {
1004           bap[jj] = *value++;
1005         }
1006       }
1007       noinsert2:;
1008       low = i;
1009     }
1010     ailen[row] = nrow;
1011   }
1012   PetscFunctionReturnVoid();
1013 }
1014 EXTERN_C_END
1015 
1016 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1017 #define matsetvalues4_ MATSETVALUES4
1018 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1019 #define matsetvalues4_ matsetvalues4
1020 #endif
1021 
1022 EXTERN_C_BEGIN
1023 #undef __FUNCT__
1024 #define __FUNCT__ "MatSetValues4_"
1025 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1026 {
1027   Mat         A = *AA;
1028   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1029   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1030   PetscInt    *ai=a->i,*ailen=a->ilen;
1031   PetscInt    *aj=a->j,brow,bcol;
1032   PetscInt    ridx,cidx,lastcol = -1;
1033   MatScalar   *ap,value,*aa=a->a,*bap;
1034 
1035   PetscFunctionBegin;
1036   for (k=0; k<m; k++) { /* loop over added rows */
1037     row  = im[k]; brow = row/4;
1038     rp   = aj + ai[brow];
1039     ap   = aa + 16*ai[brow];
1040     nrow = ailen[brow];
1041     low  = 0;
1042     high = nrow;
1043     for (l=0; l<n; l++) { /* loop over added columns */
1044       col = in[l]; bcol = col/4;
1045       ridx = row % 4; cidx = col % 4;
1046       value = v[l + k*n];
1047       if (col <= lastcol) low = 0; else high = nrow;
1048       lastcol = col;
1049       while (high-low > 7) {
1050         t = (low+high)/2;
1051         if (rp[t] > bcol) high = t;
1052         else              low  = t;
1053       }
1054       for (i=low; i<high; i++) {
1055         if (rp[i] > bcol) break;
1056         if (rp[i] == bcol) {
1057           bap  = ap +  16*i + 4*cidx + ridx;
1058           *bap += value;
1059           goto noinsert1;
1060         }
1061       }
1062       N = nrow++ - 1;
1063       high++; /* added new column thus must search to one higher than before */
1064       /* shift up all the later entries in this row */
1065       for (ii=N; ii>=i; ii--) {
1066         rp[ii+1] = rp[ii];
1067         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1068       }
1069       if (N>=i) {
1070         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1071       }
1072       rp[i]                    = bcol;
1073       ap[16*i + 4*cidx + ridx] = value;
1074       noinsert1:;
1075       low = i;
1076     }
1077     ailen[brow] = nrow;
1078   }
1079   PetscFunctionReturnVoid();
1080 }
1081 EXTERN_C_END
1082 
1083 /*
1084      Checks for missing diagonals
1085 */
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1088 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1089 {
1090   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1091   PetscErrorCode ierr;
1092   PetscInt       *diag,*jj = a->j,i;
1093 
1094   PetscFunctionBegin;
1095   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1096   *missing = PETSC_FALSE;
1097   diag     = a->diag;
1098   for (i=0; i<a->mbs; i++) {
1099     if (jj[diag[i]] != i) {
1100       *missing  = PETSC_TRUE;
1101       if (d) *d = i;
1102     }
1103   }
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1109 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1110 {
1111   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1112   PetscErrorCode ierr;
1113   PetscInt       i,j,m = a->mbs;
1114 
1115   PetscFunctionBegin;
1116   if (!a->diag) {
1117     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1118   }
1119   for (i=0; i<m; i++) {
1120     a->diag[i] = a->i[i+1];
1121     for (j=a->i[i]; j<a->i[i+1]; j++) {
1122       if (a->j[j] == i) {
1123         a->diag[i] = j;
1124         break;
1125       }
1126     }
1127   }
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 
1132 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1136 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1137 {
1138   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1139   PetscErrorCode ierr;
1140   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt;
1141   PetscInt       *tia, *tja;
1142 
1143   PetscFunctionBegin;
1144   *nn = n;
1145   if (!ia) PetscFunctionReturn(0);
1146   if (symmetric) {
1147     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1148   } else {
1149     tia = a->i; tja = a->j;
1150   }
1151 
1152   if (!blockcompressed && bs > 1) {
1153     (*nn) *= bs;
1154     nbs    = bs;
1155     /* malloc & create the natural set of indices */
1156     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1157     if (n) {
1158       (*ia)[0] = 0;
1159       for (j=1; j<bs; j++) {
1160         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1161       }
1162     }
1163 
1164     for (i=1; i<n; i++) {
1165       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1166       for (j=1; j<bs; j++) {
1167         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1168       }
1169     }
1170     if (n) {
1171       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1172     }
1173 
1174     if (ja) {
1175       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1176       cnt = 0;
1177       for (i=0; i<n; i++) {
1178         for (j=0; j<bs; j++) {
1179           for (k=tia[i]; k<tia[i+1]; k++) {
1180             for (l=0; l<bs; l++) {
1181               (*ja)[cnt++] = bs*tja[k] + l;
1182 	    }
1183           }
1184         }
1185       }
1186     }
1187 
1188     n     *= bs;
1189     nz *= bs*bs;
1190     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1191       ierr = PetscFree(tia);CHKERRQ(ierr);
1192       ierr = PetscFree(tja);CHKERRQ(ierr);
1193     }
1194   } else {
1195     *ia = tia;
1196     if (ja) *ja = tja;
1197   }
1198   if (oshift == 1) {
1199     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1200     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1201   }
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1207 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1208 {
1209   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1210   PetscErrorCode ierr;
1211   PetscInt       i,n = a->mbs,nz = a->i[n];
1212 
1213   PetscFunctionBegin;
1214   if (!ia) PetscFunctionReturn(0);
1215   if (!blockcompressed && A->rmap.bs > 1) {
1216     ierr = PetscFree(*ia);CHKERRQ(ierr);
1217     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1218   } else if (symmetric) {
1219     ierr = PetscFree(*ia);CHKERRQ(ierr);
1220     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1221   } else if (oshift == 1) { /* blockcompressed */
1222     for (i=0; i<n+1; i++) a->i[i]--;
1223     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1224   }
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #undef __FUNCT__
1229 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1230 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1231 {
1232   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1233   PetscErrorCode ierr;
1234 
1235   PetscFunctionBegin;
1236 #if defined(PETSC_USE_LOG)
1237   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
1238 #endif
1239   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1240   if (a->row) {
1241     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1242   }
1243   if (a->col) {
1244     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1245   }
1246   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1247   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1248   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1249   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1250   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1251   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1252   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1253   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1254   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1255 
1256   ierr = PetscFree(a);CHKERRQ(ierr);
1257 
1258   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1259   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1260   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1261   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1262   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1263   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1264   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1265   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1271 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1272 {
1273   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1274   PetscErrorCode ierr;
1275 
1276   PetscFunctionBegin;
1277   switch (op) {
1278   case MAT_ROW_ORIENTED:
1279     a->roworiented    = flg;
1280     break;
1281   case MAT_KEEP_ZEROED_ROWS:
1282     a->keepzeroedrows = flg;
1283     break;
1284   case MAT_NEW_NONZERO_LOCATIONS:
1285     a->nonew          = (flg ? 0 : 1);
1286     break;
1287   case MAT_NEW_NONZERO_LOCATION_ERR:
1288     a->nonew          = (flg ? -1 : 0);
1289     break;
1290   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1291     a->nonew          = (flg ? -2 : 0);
1292     break;
1293   case MAT_NEW_DIAGONALS:
1294   case MAT_IGNORE_OFF_PROC_ENTRIES:
1295   case MAT_USE_HASH_TABLE:
1296     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1297     break;
1298   case MAT_SYMMETRIC:
1299   case MAT_STRUCTURALLY_SYMMETRIC:
1300   case MAT_HERMITIAN:
1301   case MAT_SYMMETRY_ETERNAL:
1302     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1303     break;
1304   default:
1305     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1306   }
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1312 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1313 {
1314   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1317   MatScalar      *aa,*aa_i;
1318   PetscScalar    *v_i;
1319 
1320   PetscFunctionBegin;
1321   bs  = A->rmap.bs;
1322   ai  = a->i;
1323   aj  = a->j;
1324   aa  = a->a;
1325   bs2 = a->bs2;
1326 
1327   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1328 
1329   bn  = row/bs;   /* Block number */
1330   bp  = row % bs; /* Block Position */
1331   M   = ai[bn+1] - ai[bn];
1332   *nz = bs*M;
1333 
1334   if (v) {
1335     *v = 0;
1336     if (*nz) {
1337       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1338       for (i=0; i<M; i++) { /* for each block in the block row */
1339         v_i  = *v + i*bs;
1340         aa_i = aa + bs2*(ai[bn] + i);
1341         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1342       }
1343     }
1344   }
1345 
1346   if (idx) {
1347     *idx = 0;
1348     if (*nz) {
1349       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1350       for (i=0; i<M; i++) { /* for each block in the block row */
1351         idx_i = *idx + i*bs;
1352         itmp  = bs*aj[ai[bn] + i];
1353         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1354       }
1355     }
1356   }
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1362 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1363 {
1364   PetscErrorCode ierr;
1365 
1366   PetscFunctionBegin;
1367   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1368   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1374 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1375 {
1376   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1377   Mat            C;
1378   PetscErrorCode ierr;
1379   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1380   PetscInt       *rows,*cols,bs2=a->bs2;
1381   PetscScalar    *array;
1382 
1383   PetscFunctionBegin;
1384   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1385   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1386   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1387 
1388   array = a->a;
1389   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1390   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1391   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1392   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1393   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1394   ierr = PetscFree(col);CHKERRQ(ierr);
1395   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1396   cols = rows + bs;
1397   for (i=0; i<mbs; i++) {
1398     cols[0] = i*bs;
1399     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1400     len = ai[i+1] - ai[i];
1401     for (j=0; j<len; j++) {
1402       rows[0] = (*aj++)*bs;
1403       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1404       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1405       array += bs2;
1406     }
1407   }
1408   ierr = PetscFree(rows);CHKERRQ(ierr);
1409 
1410   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412 
1413   if (B) {
1414     *B = C;
1415   } else {
1416     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1423 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1424 {
1425   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1426   PetscErrorCode ierr;
1427   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1428   int            fd;
1429   PetscScalar    *aa;
1430   FILE           *file;
1431 
1432   PetscFunctionBegin;
1433   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1434   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1435   col_lens[0] = MAT_FILE_COOKIE;
1436 
1437   col_lens[1] = A->rmap.N;
1438   col_lens[2] = A->cmap.n;
1439   col_lens[3] = a->nz*bs2;
1440 
1441   /* store lengths of each row and write (including header) to file */
1442   count = 0;
1443   for (i=0; i<a->mbs; i++) {
1444     for (j=0; j<bs; j++) {
1445       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1446     }
1447   }
1448   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1449   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1450 
1451   /* store column indices (zero start index) */
1452   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1453   count = 0;
1454   for (i=0; i<a->mbs; i++) {
1455     for (j=0; j<bs; j++) {
1456       for (k=a->i[i]; k<a->i[i+1]; k++) {
1457         for (l=0; l<bs; l++) {
1458           jj[count++] = bs*a->j[k] + l;
1459         }
1460       }
1461     }
1462   }
1463   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1464   ierr = PetscFree(jj);CHKERRQ(ierr);
1465 
1466   /* store nonzero values */
1467   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1468   count = 0;
1469   for (i=0; i<a->mbs; i++) {
1470     for (j=0; j<bs; j++) {
1471       for (k=a->i[i]; k<a->i[i+1]; k++) {
1472         for (l=0; l<bs; l++) {
1473           aa[count++] = a->a[bs2*k + l*bs + j];
1474         }
1475       }
1476     }
1477   }
1478   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1479   ierr = PetscFree(aa);CHKERRQ(ierr);
1480 
1481   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1482   if (file) {
1483     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1484   }
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1490 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1491 {
1492   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1493   PetscErrorCode    ierr;
1494   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1495   PetscViewerFormat format;
1496 
1497   PetscFunctionBegin;
1498   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1499   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1500     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1501   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1502     Mat aij;
1503     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1504     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1505     ierr = MatDestroy(aij);CHKERRQ(ierr);
1506   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1507      PetscFunctionReturn(0);
1508   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1509     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1510     for (i=0; i<a->mbs; i++) {
1511       for (j=0; j<bs; j++) {
1512         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1513         for (k=a->i[i]; k<a->i[i+1]; k++) {
1514           for (l=0; l<bs; l++) {
1515 #if defined(PETSC_USE_COMPLEX)
1516             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1517               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1518                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1519             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1520               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1521                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1522             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1523               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1524             }
1525 #else
1526             if (a->a[bs2*k + l*bs + j] != 0.0) {
1527               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1528             }
1529 #endif
1530           }
1531         }
1532         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1533       }
1534     }
1535     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1536   } else {
1537     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1538     for (i=0; i<a->mbs; i++) {
1539       for (j=0; j<bs; j++) {
1540         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1541         for (k=a->i[i]; k<a->i[i+1]; k++) {
1542           for (l=0; l<bs; l++) {
1543 #if defined(PETSC_USE_COMPLEX)
1544             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1545               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1546                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1547             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1548               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1549                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1550             } else {
1551               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1552             }
1553 #else
1554             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1555 #endif
1556           }
1557         }
1558         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1559       }
1560     }
1561     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1562   }
1563   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1569 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1570 {
1571   Mat            A = (Mat) Aa;
1572   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1573   PetscErrorCode ierr;
1574   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1575   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1576   MatScalar      *aa;
1577   PetscViewer    viewer;
1578 
1579   PetscFunctionBegin;
1580 
1581   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1582   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1583 
1584   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1585 
1586   /* loop over matrix elements drawing boxes */
1587   color = PETSC_DRAW_BLUE;
1588   for (i=0,row=0; i<mbs; i++,row+=bs) {
1589     for (j=a->i[i]; j<a->i[i+1]; j++) {
1590       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1591       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1592       aa = a->a + j*bs2;
1593       for (k=0; k<bs; k++) {
1594         for (l=0; l<bs; l++) {
1595           if (PetscRealPart(*aa++) >=  0.) continue;
1596           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1597         }
1598       }
1599     }
1600   }
1601   color = PETSC_DRAW_CYAN;
1602   for (i=0,row=0; i<mbs; i++,row+=bs) {
1603     for (j=a->i[i]; j<a->i[i+1]; j++) {
1604       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1605       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1606       aa = a->a + j*bs2;
1607       for (k=0; k<bs; k++) {
1608         for (l=0; l<bs; l++) {
1609           if (PetscRealPart(*aa++) != 0.) continue;
1610           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1611         }
1612       }
1613     }
1614   }
1615 
1616   color = PETSC_DRAW_RED;
1617   for (i=0,row=0; i<mbs; i++,row+=bs) {
1618     for (j=a->i[i]; j<a->i[i+1]; j++) {
1619       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1620       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1621       aa = a->a + j*bs2;
1622       for (k=0; k<bs; k++) {
1623         for (l=0; l<bs; l++) {
1624           if (PetscRealPart(*aa++) <= 0.) continue;
1625           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1626         }
1627       }
1628     }
1629   }
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 #undef __FUNCT__
1634 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1635 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1636 {
1637   PetscErrorCode ierr;
1638   PetscReal      xl,yl,xr,yr,w,h;
1639   PetscDraw      draw;
1640   PetscTruth     isnull;
1641 
1642   PetscFunctionBegin;
1643 
1644   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1645   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1646 
1647   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1648   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1649   xr += w;    yr += h;  xl = -w;     yl = -h;
1650   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1651   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1652   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "MatView_SeqBAIJ"
1658 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1659 {
1660   PetscErrorCode ierr;
1661   PetscTruth     iascii,isbinary,isdraw;
1662 
1663   PetscFunctionBegin;
1664   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1665   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1666   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1667   if (iascii){
1668     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1669   } else if (isbinary) {
1670     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1671   } else if (isdraw) {
1672     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1673   } else {
1674     Mat B;
1675     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1676     ierr = MatView(B,viewer);CHKERRQ(ierr);
1677     ierr = MatDestroy(B);CHKERRQ(ierr);
1678   }
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1685 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1686 {
1687   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1688   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1689   PetscInt    *ai = a->i,*ailen = a->ilen;
1690   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1691   MatScalar   *ap,*aa = a->a;
1692 
1693   PetscFunctionBegin;
1694   for (k=0; k<m; k++) { /* loop over rows */
1695     row  = im[k]; brow = row/bs;
1696     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1697     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1698     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1699     nrow = ailen[brow];
1700     for (l=0; l<n; l++) { /* loop over columns */
1701       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1702       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1703       col  = in[l] ;
1704       bcol = col/bs;
1705       cidx = col%bs;
1706       ridx = row%bs;
1707       high = nrow;
1708       low  = 0; /* assume unsorted */
1709       while (high-low > 5) {
1710         t = (low+high)/2;
1711         if (rp[t] > bcol) high = t;
1712         else             low  = t;
1713       }
1714       for (i=low; i<high; i++) {
1715         if (rp[i] > bcol) break;
1716         if (rp[i] == bcol) {
1717           *v++ = ap[bs2*i+bs*cidx+ridx];
1718           goto finished;
1719         }
1720       }
1721       *v++ = 0.0;
1722       finished:;
1723     }
1724   }
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 #define CHUNKSIZE 10
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1731 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1732 {
1733   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1734   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1735   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1736   PetscErrorCode  ierr;
1737   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1738   PetscTruth      roworiented=a->roworiented;
1739   const MatScalar *value = v;
1740   MatScalar       *ap,*aa = a->a,*bap;
1741 
1742   PetscFunctionBegin;
1743   if (roworiented) {
1744     stepval = (n-1)*bs;
1745   } else {
1746     stepval = (m-1)*bs;
1747   }
1748   for (k=0; k<m; k++) { /* loop over added rows */
1749     row  = im[k];
1750     if (row < 0) continue;
1751 #if defined(PETSC_USE_DEBUG)
1752     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1753 #endif
1754     rp   = aj + ai[row];
1755     ap   = aa + bs2*ai[row];
1756     rmax = imax[row];
1757     nrow = ailen[row];
1758     low  = 0;
1759     high = nrow;
1760     for (l=0; l<n; l++) { /* loop over added columns */
1761       if (in[l] < 0) continue;
1762 #if defined(PETSC_USE_DEBUG)
1763       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1764 #endif
1765       col = in[l];
1766       if (roworiented) {
1767         value = v + k*(stepval+bs)*bs + l*bs;
1768       } else {
1769         value = v + l*(stepval+bs)*bs + k*bs;
1770       }
1771       if (col <= lastcol) low = 0; else high = nrow;
1772       lastcol = col;
1773       while (high-low > 7) {
1774         t = (low+high)/2;
1775         if (rp[t] > col) high = t;
1776         else             low  = t;
1777       }
1778       for (i=low; i<high; i++) {
1779         if (rp[i] > col) break;
1780         if (rp[i] == col) {
1781           bap  = ap +  bs2*i;
1782           if (roworiented) {
1783             if (is == ADD_VALUES) {
1784               for (ii=0; ii<bs; ii++,value+=stepval) {
1785                 for (jj=ii; jj<bs2; jj+=bs) {
1786                   bap[jj] += *value++;
1787                 }
1788               }
1789             } else {
1790               for (ii=0; ii<bs; ii++,value+=stepval) {
1791                 for (jj=ii; jj<bs2; jj+=bs) {
1792                   bap[jj] = *value++;
1793                 }
1794               }
1795             }
1796           } else {
1797             if (is == ADD_VALUES) {
1798               for (ii=0; ii<bs; ii++,value+=stepval) {
1799                 for (jj=0; jj<bs; jj++) {
1800                   *bap++ += *value++;
1801                 }
1802               }
1803             } else {
1804               for (ii=0; ii<bs; ii++,value+=stepval) {
1805                 for (jj=0; jj<bs; jj++) {
1806                   *bap++  = *value++;
1807                 }
1808               }
1809             }
1810           }
1811           goto noinsert2;
1812         }
1813       }
1814       if (nonew == 1) goto noinsert2;
1815       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1816       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1817       N = nrow++ - 1; high++;
1818       /* shift up all the later entries in this row */
1819       for (ii=N; ii>=i; ii--) {
1820         rp[ii+1] = rp[ii];
1821         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1822       }
1823       if (N >= i) {
1824         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1825       }
1826       rp[i] = col;
1827       bap   = ap +  bs2*i;
1828       if (roworiented) {
1829         for (ii=0; ii<bs; ii++,value+=stepval) {
1830           for (jj=ii; jj<bs2; jj+=bs) {
1831             bap[jj] = *value++;
1832           }
1833         }
1834       } else {
1835         for (ii=0; ii<bs; ii++,value+=stepval) {
1836           for (jj=0; jj<bs; jj++) {
1837             *bap++  = *value++;
1838           }
1839         }
1840       }
1841       noinsert2:;
1842       low = i;
1843     }
1844     ailen[row] = nrow;
1845   }
1846   PetscFunctionReturn(0);
1847 }
1848 
1849 #undef __FUNCT__
1850 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1851 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1852 {
1853   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1854   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1855   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1856   PetscErrorCode ierr;
1857   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1858   MatScalar      *aa = a->a,*ap;
1859   PetscReal      ratio=0.6;
1860 
1861   PetscFunctionBegin;
1862   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1863 
1864   if (m) rmax = ailen[0];
1865   for (i=1; i<mbs; i++) {
1866     /* move each row back by the amount of empty slots (fshift) before it*/
1867     fshift += imax[i-1] - ailen[i-1];
1868     rmax   = PetscMax(rmax,ailen[i]);
1869     if (fshift) {
1870       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1871       N = ailen[i];
1872       for (j=0; j<N; j++) {
1873         ip[j-fshift] = ip[j];
1874         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1875       }
1876     }
1877     ai[i] = ai[i-1] + ailen[i-1];
1878   }
1879   if (mbs) {
1880     fshift += imax[mbs-1] - ailen[mbs-1];
1881     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1882   }
1883   /* reset ilen and imax for each row */
1884   for (i=0; i<mbs; i++) {
1885     ailen[i] = imax[i] = ai[i+1] - ai[i];
1886   }
1887   a->nz = ai[mbs];
1888 
1889   /* diagonals may have moved, so kill the diagonal pointers */
1890   a->idiagvalid = PETSC_FALSE;
1891   if (fshift && a->diag) {
1892     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1893     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1894     a->diag = 0;
1895   }
1896   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1897   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1898   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1899   a->reallocs          = 0;
1900   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1901 
1902   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1903   if (a->compressedrow.use){
1904     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1905   }
1906 
1907   A->same_nonzero = PETSC_TRUE;
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 /*
1912    This function returns an array of flags which indicate the locations of contiguous
1913    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1914    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1915    Assume: sizes should be long enough to hold all the values.
1916 */
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1919 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1920 {
1921   PetscInt   i,j,k,row;
1922   PetscTruth flg;
1923 
1924   PetscFunctionBegin;
1925   for (i=0,j=0; i<n; j++) {
1926     row = idx[i];
1927     if (row%bs!=0) { /* Not the begining of a block */
1928       sizes[j] = 1;
1929       i++;
1930     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1931       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1932       i++;
1933     } else { /* Begining of the block, so check if the complete block exists */
1934       flg = PETSC_TRUE;
1935       for (k=1; k<bs; k++) {
1936         if (row+k != idx[i+k]) { /* break in the block */
1937           flg = PETSC_FALSE;
1938           break;
1939         }
1940       }
1941       if (flg) { /* No break in the bs */
1942         sizes[j] = bs;
1943         i+= bs;
1944       } else {
1945         sizes[j] = 1;
1946         i++;
1947       }
1948     }
1949   }
1950   *bs_max = j;
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1956 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1957 {
1958   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1959   PetscErrorCode ierr;
1960   PetscInt       i,j,k,count,*rows;
1961   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1962   PetscScalar    zero = 0.0;
1963   MatScalar      *aa;
1964 
1965   PetscFunctionBegin;
1966   /* Make a copy of the IS and  sort it */
1967   /* allocate memory for rows,sizes */
1968   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1969   sizes = rows + is_n;
1970 
1971   /* copy IS values to rows, and sort them */
1972   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1973   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1974   if (baij->keepzeroedrows) {
1975     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1976     bs_max = is_n;
1977     A->same_nonzero = PETSC_TRUE;
1978   } else {
1979     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1980     A->same_nonzero = PETSC_FALSE;
1981   }
1982 
1983   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1984     row   = rows[j];
1985     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1986     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1987     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1988     if (sizes[i] == bs && !baij->keepzeroedrows) {
1989       if (diag != 0.0) {
1990         if (baij->ilen[row/bs] > 0) {
1991           baij->ilen[row/bs]       = 1;
1992           baij->j[baij->i[row/bs]] = row/bs;
1993           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1994         }
1995         /* Now insert all the diagonal values for this bs */
1996         for (k=0; k<bs; k++) {
1997           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1998         }
1999       } else { /* (diag == 0.0) */
2000         baij->ilen[row/bs] = 0;
2001       } /* end (diag == 0.0) */
2002     } else { /* (sizes[i] != bs) */
2003 #if defined (PETSC_USE_DEBUG)
2004       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2005 #endif
2006       for (k=0; k<count; k++) {
2007         aa[0] =  zero;
2008         aa    += bs;
2009       }
2010       if (diag != 0.0) {
2011         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2012       }
2013     }
2014   }
2015 
2016   ierr = PetscFree(rows);CHKERRQ(ierr);
2017   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 #undef __FUNCT__
2022 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2023 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2024 {
2025   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2026   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2027   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2028   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
2029   PetscErrorCode ierr;
2030   PetscInt       ridx,cidx,bs2=a->bs2;
2031   PetscTruth     roworiented=a->roworiented;
2032   MatScalar      *ap,value,*aa=a->a,*bap;
2033 
2034   PetscFunctionBegin;
2035   for (k=0; k<m; k++) { /* loop over added rows */
2036     row  = im[k];
2037     brow = row/bs;
2038     if (row < 0) continue;
2039 #if defined(PETSC_USE_DEBUG)
2040     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
2041 #endif
2042     rp   = aj + ai[brow];
2043     ap   = aa + bs2*ai[brow];
2044     rmax = imax[brow];
2045     nrow = ailen[brow];
2046     low  = 0;
2047     high = nrow;
2048     for (l=0; l<n; l++) { /* loop over added columns */
2049       if (in[l] < 0) continue;
2050 #if defined(PETSC_USE_DEBUG)
2051       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
2052 #endif
2053       col = in[l]; bcol = col/bs;
2054       ridx = row % bs; cidx = col % bs;
2055       if (roworiented) {
2056         value = v[l + k*n];
2057       } else {
2058         value = v[k + l*m];
2059       }
2060       if (col <= lastcol) low = 0; else high = nrow;
2061       lastcol = col;
2062       while (high-low > 7) {
2063         t = (low+high)/2;
2064         if (rp[t] > bcol) high = t;
2065         else              low  = t;
2066       }
2067       for (i=low; i<high; i++) {
2068         if (rp[i] > bcol) break;
2069         if (rp[i] == bcol) {
2070           bap  = ap +  bs2*i + bs*cidx + ridx;
2071           if (is == ADD_VALUES) *bap += value;
2072           else                  *bap  = value;
2073           goto noinsert1;
2074         }
2075       }
2076       if (nonew == 1) goto noinsert1;
2077       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2078       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2079       N = nrow++ - 1; high++;
2080       /* shift up all the later entries in this row */
2081       for (ii=N; ii>=i; ii--) {
2082         rp[ii+1] = rp[ii];
2083         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2084       }
2085       if (N>=i) {
2086         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2087       }
2088       rp[i]                      = bcol;
2089       ap[bs2*i + bs*cidx + ridx] = value;
2090       a->nz++;
2091       noinsert1:;
2092       low = i;
2093     }
2094     ailen[brow] = nrow;
2095   }
2096   A->same_nonzero = PETSC_FALSE;
2097   PetscFunctionReturn(0);
2098 }
2099 
2100 
2101 #undef __FUNCT__
2102 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2103 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
2104 {
2105   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2106   Mat            outA;
2107   PetscErrorCode ierr;
2108   PetscTruth     row_identity,col_identity;
2109 
2110   PetscFunctionBegin;
2111   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2112   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2113   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2114   if (!row_identity || !col_identity) {
2115     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2116   }
2117 
2118   outA          = inA;
2119   inA->factor   = FACTOR_LU;
2120 
2121   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2122 
2123   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2124   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2125   a->row = row;
2126   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2127   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2128   a->col = col;
2129 
2130   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2131   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2132   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2133 
2134   /*
2135       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
2136       for ILU(0) factorization with natural ordering
2137   */
2138   if (inA->rmap.bs < 8) {
2139     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
2140   } else {
2141     if (!a->solve_work) {
2142       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2143       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2144     }
2145   }
2146 
2147   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
2148 
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 EXTERN_C_BEGIN
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2155 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2156 {
2157   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2158   PetscInt    i,nz,nbs;
2159 
2160   PetscFunctionBegin;
2161   nz  = baij->maxnz/baij->bs2;
2162   nbs = baij->nbs;
2163   for (i=0; i<nz; i++) {
2164     baij->j[i] = indices[i];
2165   }
2166   baij->nz = nz;
2167   for (i=0; i<nbs; i++) {
2168     baij->ilen[i] = baij->imax[i];
2169   }
2170 
2171   PetscFunctionReturn(0);
2172 }
2173 EXTERN_C_END
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2177 /*@
2178     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2179        in the matrix.
2180 
2181   Input Parameters:
2182 +  mat - the SeqBAIJ matrix
2183 -  indices - the column indices
2184 
2185   Level: advanced
2186 
2187   Notes:
2188     This can be called if you have precomputed the nonzero structure of the
2189   matrix and want to provide it to the matrix object to improve the performance
2190   of the MatSetValues() operation.
2191 
2192     You MUST have set the correct numbers of nonzeros per row in the call to
2193   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2194 
2195     MUST be called before any calls to MatSetValues();
2196 
2197 @*/
2198 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2199 {
2200   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2201 
2202   PetscFunctionBegin;
2203   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2204   PetscValidPointer(indices,2);
2205   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2206   if (f) {
2207     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2208   } else {
2209     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2210   }
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 #undef __FUNCT__
2215 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2216 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2217 {
2218   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2219   PetscErrorCode ierr;
2220   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2221   PetscReal      atmp;
2222   PetscScalar    *x,zero = 0.0;
2223   MatScalar      *aa;
2224   PetscInt       ncols,brow,krow,kcol;
2225 
2226   PetscFunctionBegin;
2227   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2228   bs   = A->rmap.bs;
2229   aa   = a->a;
2230   ai   = a->i;
2231   aj   = a->j;
2232   mbs  = a->mbs;
2233 
2234   ierr = VecSet(v,zero);CHKERRQ(ierr);
2235   if (idx) {
2236     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
2237   }
2238   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2239   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2240   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2241   for (i=0; i<mbs; i++) {
2242     ncols = ai[1] - ai[0]; ai++;
2243     brow  = bs*i;
2244     for (j=0; j<ncols; j++){
2245       for (kcol=0; kcol<bs; kcol++){
2246         for (krow=0; krow<bs; krow++){
2247           atmp = PetscAbsScalar(*aa);aa++;
2248           row = brow + krow;    /* row index */
2249           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2250           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2251         }
2252       }
2253       aj++;
2254     }
2255   }
2256   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "MatCopy_SeqBAIJ"
2262 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2263 {
2264   PetscErrorCode ierr;
2265 
2266   PetscFunctionBegin;
2267   /* If the two matrices have the same copy implementation, use fast copy. */
2268   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2269     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2270     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2271 
2272     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
2273       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2274     }
2275     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
2276   } else {
2277     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2278   }
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2284 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2285 {
2286   PetscErrorCode ierr;
2287 
2288   PetscFunctionBegin;
2289   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 #undef __FUNCT__
2294 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2295 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2296 {
2297   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2298   PetscFunctionBegin;
2299   *array = a->a;
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2305 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2306 {
2307   PetscFunctionBegin;
2308   PetscFunctionReturn(0);
2309 }
2310 
2311 #include "petscblaslapack.h"
2312 #undef __FUNCT__
2313 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2314 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2315 {
2316   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2317   PetscErrorCode ierr;
2318   PetscInt       i,bs=Y->rmap.bs,j,bs2;
2319   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2320 
2321   PetscFunctionBegin;
2322   if (str == SAME_NONZERO_PATTERN) {
2323     PetscScalar alpha = a;
2324     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2325   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2326     if (y->xtoy && y->XtoY != X) {
2327       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2328       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2329     }
2330     if (!y->xtoy) { /* get xtoy */
2331       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2332       y->XtoY = X;
2333     }
2334     bs2 = bs*bs;
2335     for (i=0; i<x->nz; i++) {
2336       j = 0;
2337       while (j < bs2){
2338         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2339         j++;
2340       }
2341     }
2342     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2343   } else {
2344     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2345   }
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2351 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2352 {
2353   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2354   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2355   PetscScalar    *aa = a->a;
2356 
2357   PetscFunctionBegin;
2358   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2359   PetscFunctionReturn(0);
2360 }
2361 
2362 #undef __FUNCT__
2363 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2364 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2365 {
2366   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2367   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2368   PetscScalar    *aa = a->a;
2369 
2370   PetscFunctionBegin;
2371   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2372   PetscFunctionReturn(0);
2373 }
2374 
2375 
2376 /* -------------------------------------------------------------------*/
2377 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2378        MatGetRow_SeqBAIJ,
2379        MatRestoreRow_SeqBAIJ,
2380        MatMult_SeqBAIJ_N,
2381 /* 4*/ MatMultAdd_SeqBAIJ_N,
2382        MatMultTranspose_SeqBAIJ,
2383        MatMultTransposeAdd_SeqBAIJ,
2384        MatSolve_SeqBAIJ_N,
2385        0,
2386        0,
2387 /*10*/ 0,
2388        MatLUFactor_SeqBAIJ,
2389        0,
2390        0,
2391        MatTranspose_SeqBAIJ,
2392 /*15*/ MatGetInfo_SeqBAIJ,
2393        MatEqual_SeqBAIJ,
2394        MatGetDiagonal_SeqBAIJ,
2395        MatDiagonalScale_SeqBAIJ,
2396        MatNorm_SeqBAIJ,
2397 /*20*/ 0,
2398        MatAssemblyEnd_SeqBAIJ,
2399        0,
2400        MatSetOption_SeqBAIJ,
2401        MatZeroEntries_SeqBAIJ,
2402 /*25*/ MatZeroRows_SeqBAIJ,
2403        MatLUFactorSymbolic_SeqBAIJ,
2404        MatLUFactorNumeric_SeqBAIJ_N,
2405        MatCholeskyFactorSymbolic_SeqBAIJ,
2406        MatCholeskyFactorNumeric_SeqBAIJ_N,
2407 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2408        MatILUFactorSymbolic_SeqBAIJ,
2409        MatICCFactorSymbolic_SeqBAIJ,
2410        MatGetArray_SeqBAIJ,
2411        MatRestoreArray_SeqBAIJ,
2412 /*35*/ MatDuplicate_SeqBAIJ,
2413        0,
2414        0,
2415        MatILUFactor_SeqBAIJ,
2416        0,
2417 /*40*/ MatAXPY_SeqBAIJ,
2418        MatGetSubMatrices_SeqBAIJ,
2419        MatIncreaseOverlap_SeqBAIJ,
2420        MatGetValues_SeqBAIJ,
2421        MatCopy_SeqBAIJ,
2422 /*45*/ 0,
2423        MatScale_SeqBAIJ,
2424        0,
2425        0,
2426        0,
2427 /*50*/ 0,
2428        MatGetRowIJ_SeqBAIJ,
2429        MatRestoreRowIJ_SeqBAIJ,
2430        0,
2431        0,
2432 /*55*/ 0,
2433        0,
2434        0,
2435        0,
2436        MatSetValuesBlocked_SeqBAIJ,
2437 /*60*/ MatGetSubMatrix_SeqBAIJ,
2438        MatDestroy_SeqBAIJ,
2439        MatView_SeqBAIJ,
2440        0,
2441        0,
2442 /*65*/ 0,
2443        0,
2444        0,
2445        0,
2446        0,
2447 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2448        MatConvert_Basic,
2449        0,
2450        0,
2451        0,
2452 /*75*/ 0,
2453        0,
2454        0,
2455        0,
2456        0,
2457 /*80*/ 0,
2458        0,
2459        0,
2460        0,
2461        MatLoad_SeqBAIJ,
2462 /*85*/ 0,
2463        0,
2464        0,
2465        0,
2466        0,
2467 /*90*/ 0,
2468        0,
2469        0,
2470        0,
2471        0,
2472 /*95*/ 0,
2473        0,
2474        0,
2475        0,
2476        0,
2477 /*100*/0,
2478        0,
2479        0,
2480        0,
2481        0,
2482 /*105*/0,
2483        MatRealPart_SeqBAIJ,
2484        MatImaginaryPart_SeqBAIJ,
2485        0,
2486        0,
2487 /*110*/0,
2488        0,
2489        0,
2490        0,
2491        MatMissingDiagonal_SeqBAIJ
2492 /*115*/
2493 };
2494 
2495 EXTERN_C_BEGIN
2496 #undef __FUNCT__
2497 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2498 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2499 {
2500   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2501   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2502   PetscErrorCode ierr;
2503 
2504   PetscFunctionBegin;
2505   if (aij->nonew != 1) {
2506     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2507   }
2508 
2509   /* allocate space for values if not already there */
2510   if (!aij->saved_values) {
2511     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2512   }
2513 
2514   /* copy values over */
2515   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2516   PetscFunctionReturn(0);
2517 }
2518 EXTERN_C_END
2519 
2520 EXTERN_C_BEGIN
2521 #undef __FUNCT__
2522 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2523 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2524 {
2525   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2526   PetscErrorCode ierr;
2527   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2528 
2529   PetscFunctionBegin;
2530   if (aij->nonew != 1) {
2531     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2532   }
2533   if (!aij->saved_values) {
2534     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2535   }
2536 
2537   /* copy values over */
2538   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2539   PetscFunctionReturn(0);
2540 }
2541 EXTERN_C_END
2542 
2543 EXTERN_C_BEGIN
2544 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2545 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2546 EXTERN_C_END
2547 
2548 EXTERN_C_BEGIN
2549 #undef __FUNCT__
2550 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2551 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2552 {
2553   Mat_SeqBAIJ    *b;
2554   PetscErrorCode ierr;
2555   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2556   PetscTruth     flg,skipallocation = PETSC_FALSE;
2557 
2558   PetscFunctionBegin;
2559 
2560   if (nz == MAT_SKIP_ALLOCATION) {
2561     skipallocation = PETSC_TRUE;
2562     nz             = 0;
2563   }
2564 
2565   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2566     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2567   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2568 
2569   if (nnz && newbs != bs) {
2570     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2571   }
2572   bs   = newbs;
2573 
2574   B->rmap.bs = B->cmap.bs = bs;
2575   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2576   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2577 
2578   B->preallocated = PETSC_TRUE;
2579 
2580   mbs  = B->rmap.n/bs;
2581   nbs  = B->cmap.n/bs;
2582   bs2  = bs*bs;
2583 
2584   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2585     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2586   }
2587 
2588   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2589   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2590   if (nnz) {
2591     for (i=0; i<mbs; i++) {
2592       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2593       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2594     }
2595   }
2596 
2597   b       = (Mat_SeqBAIJ*)B->data;
2598   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2599     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2600   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2601 
2602   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2603   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2604   if (!flg) {
2605     switch (bs) {
2606     case 1:
2607       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2608       B->ops->mult            = MatMult_SeqBAIJ_1;
2609       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2610       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2611       break;
2612     case 2:
2613       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2614       B->ops->mult            = MatMult_SeqBAIJ_2;
2615       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2616       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2617       break;
2618     case 3:
2619       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2620       B->ops->mult            = MatMult_SeqBAIJ_3;
2621       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2622       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2623       break;
2624     case 4:
2625       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2626       B->ops->mult            = MatMult_SeqBAIJ_4;
2627       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2628       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2629       break;
2630     case 5:
2631       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2632       B->ops->mult            = MatMult_SeqBAIJ_5;
2633       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2634       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2635       break;
2636     case 6:
2637       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2638       B->ops->mult            = MatMult_SeqBAIJ_6;
2639       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2640       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2641       break;
2642     case 7:
2643       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2644       B->ops->mult            = MatMult_SeqBAIJ_7;
2645       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2646       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2647       break;
2648     default:
2649       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2650       B->ops->mult            = MatMult_SeqBAIJ_N;
2651       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2652       break;
2653     }
2654   }
2655   B->rmap.bs      = bs;
2656   b->mbs     = mbs;
2657   b->nbs     = nbs;
2658   if (!skipallocation) {
2659     if (!b->imax) {
2660       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2661     }
2662     /* b->ilen will count nonzeros in each block row so far. */
2663     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2664     if (!nnz) {
2665       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2666       else if (nz <= 0)        nz = 1;
2667       for (i=0; i<mbs; i++) b->imax[i] = nz;
2668       nz = nz*mbs;
2669     } else {
2670       nz = 0;
2671       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2672     }
2673 
2674     /* allocate the matrix space */
2675     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2676     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2677     ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2678     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2679     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2680     b->singlemalloc = PETSC_TRUE;
2681     b->i[0] = 0;
2682     for (i=1; i<mbs+1; i++) {
2683       b->i[i] = b->i[i-1] + b->imax[i-1];
2684     }
2685     b->free_a     = PETSC_TRUE;
2686     b->free_ij    = PETSC_TRUE;
2687   } else {
2688     b->free_a     = PETSC_FALSE;
2689     b->free_ij    = PETSC_FALSE;
2690   }
2691 
2692   B->rmap.bs          = bs;
2693   b->bs2              = bs2;
2694   b->mbs              = mbs;
2695   b->nz               = 0;
2696   b->maxnz            = nz*bs2;
2697   B->info.nz_unneeded = (PetscReal)b->maxnz;
2698   PetscFunctionReturn(0);
2699 }
2700 EXTERN_C_END
2701 
2702 /*MC
2703    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2704    block sparse compressed row format.
2705 
2706    Options Database Keys:
2707 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2708 
2709   Level: beginner
2710 
2711 .seealso: MatCreateSeqBAIJ()
2712 M*/
2713 
2714 EXTERN_C_BEGIN
2715 #undef __FUNCT__
2716 #define __FUNCT__ "MatCreate_SeqBAIJ"
2717 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2718 {
2719   PetscErrorCode ierr;
2720   PetscMPIInt    size;
2721   Mat_SeqBAIJ    *b;
2722 
2723   PetscFunctionBegin;
2724   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2725   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2726 
2727   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2728   B->data = (void*)b;
2729   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2730   B->factor           = 0;
2731   B->mapping          = 0;
2732   b->row              = 0;
2733   b->col              = 0;
2734   b->icol             = 0;
2735   b->reallocs         = 0;
2736   b->saved_values     = 0;
2737 
2738   b->roworiented      = PETSC_TRUE;
2739   b->nonew            = 0;
2740   b->diag             = 0;
2741   b->solve_work       = 0;
2742   b->mult_work        = 0;
2743   B->spptr            = 0;
2744   B->info.nz_unneeded = (PetscReal)b->maxnz;
2745   b->keepzeroedrows   = PETSC_FALSE;
2746   b->xtoy              = 0;
2747   b->XtoY              = 0;
2748   b->compressedrow.use     = PETSC_FALSE;
2749   b->compressedrow.nrows   = 0;
2750   b->compressedrow.i       = PETSC_NULL;
2751   b->compressedrow.rindex  = PETSC_NULL;
2752   b->compressedrow.checked = PETSC_FALSE;
2753   B->same_nonzero          = PETSC_FALSE;
2754 
2755   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2756                                      "MatInvertBlockDiagonal_SeqBAIJ",
2757                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2758   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2759                                      "MatStoreValues_SeqBAIJ",
2760                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2761   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2762                                      "MatRetrieveValues_SeqBAIJ",
2763                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2764   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2765                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2766                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2767   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2768                                      "MatConvert_SeqBAIJ_SeqAIJ",
2769                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2770   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2771                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2772                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2773   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2774                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2775                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2776   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2777   PetscFunctionReturn(0);
2778 }
2779 EXTERN_C_END
2780 
2781 #undef __FUNCT__
2782 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2783 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2784 {
2785   Mat            C;
2786   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2787   PetscErrorCode ierr;
2788   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2789 
2790   PetscFunctionBegin;
2791   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2792 
2793   *B = 0;
2794   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
2795   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2796   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2797   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2798   c    = (Mat_SeqBAIJ*)C->data;
2799 
2800   C->rmap.N   = A->rmap.N;
2801   C->cmap.N   = A->cmap.N;
2802   C->rmap.bs  = A->rmap.bs;
2803   c->bs2 = a->bs2;
2804   c->mbs = a->mbs;
2805   c->nbs = a->nbs;
2806 
2807   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2808   for (i=0; i<mbs; i++) {
2809     c->imax[i] = a->imax[i];
2810     c->ilen[i] = a->ilen[i];
2811   }
2812 
2813   /* allocate the matrix space */
2814   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2815   c->singlemalloc = PETSC_TRUE;
2816   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2817   if (mbs > 0) {
2818     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2819     if (cpvalues == MAT_COPY_VALUES) {
2820       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2821     } else {
2822       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2823     }
2824   }
2825   c->roworiented = a->roworiented;
2826   c->nonew       = a->nonew;
2827 
2828   if (a->diag) {
2829     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2830     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2831     for (i=0; i<mbs; i++) {
2832       c->diag[i] = a->diag[i];
2833     }
2834   } else c->diag        = 0;
2835   c->nz                 = a->nz;
2836   c->maxnz              = a->maxnz;
2837   c->solve_work         = 0;
2838   c->mult_work          = 0;
2839   c->free_a             = PETSC_TRUE;
2840   c->free_ij            = PETSC_TRUE;
2841   C->preallocated       = PETSC_TRUE;
2842   C->assembled          = PETSC_TRUE;
2843 
2844   c->compressedrow.use     = a->compressedrow.use;
2845   c->compressedrow.nrows   = a->compressedrow.nrows;
2846   c->compressedrow.checked = a->compressedrow.checked;
2847   if ( a->compressedrow.checked && a->compressedrow.use){
2848     i = a->compressedrow.nrows;
2849     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2850     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2851     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2852     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2853   } else {
2854     c->compressedrow.use    = PETSC_FALSE;
2855     c->compressedrow.i      = PETSC_NULL;
2856     c->compressedrow.rindex = PETSC_NULL;
2857   }
2858   C->same_nonzero = A->same_nonzero;
2859   *B = C;
2860   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 #undef __FUNCT__
2865 #define __FUNCT__ "MatLoad_SeqBAIJ"
2866 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2867 {
2868   Mat_SeqBAIJ    *a;
2869   Mat            B;
2870   PetscErrorCode ierr;
2871   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2872   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2873   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2874   PetscInt       *masked,nmask,tmp,bs2,ishift;
2875   PetscMPIInt    size;
2876   int            fd;
2877   PetscScalar    *aa;
2878   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2879 
2880   PetscFunctionBegin;
2881   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2882     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2883   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2884   bs2  = bs*bs;
2885 
2886   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2887   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2888   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2889   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2890   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2891   M = header[1]; N = header[2]; nz = header[3];
2892 
2893   if (header[3] < 0) {
2894     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2895   }
2896 
2897   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2898 
2899   /*
2900      This code adds extra rows to make sure the number of rows is
2901     divisible by the blocksize
2902   */
2903   mbs        = M/bs;
2904   extra_rows = bs - M + bs*(mbs);
2905   if (extra_rows == bs) extra_rows = 0;
2906   else                  mbs++;
2907   if (extra_rows) {
2908     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2909   }
2910 
2911   /* read in row lengths */
2912   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2913   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2914   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2915 
2916   /* read in column indices */
2917   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2918   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2919   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2920 
2921   /* loop over row lengths determining block row lengths */
2922   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2923   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2924   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2925   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2926   masked   = mask + mbs;
2927   rowcount = 0; nzcount = 0;
2928   for (i=0; i<mbs; i++) {
2929     nmask = 0;
2930     for (j=0; j<bs; j++) {
2931       kmax = rowlengths[rowcount];
2932       for (k=0; k<kmax; k++) {
2933         tmp = jj[nzcount++]/bs;
2934         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2935       }
2936       rowcount++;
2937     }
2938     browlengths[i] += nmask;
2939     /* zero out the mask elements we set */
2940     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2941   }
2942 
2943   /* create our matrix */
2944   ierr = MatCreate(comm,&B);
2945   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2946   ierr = MatSetType(B,type);CHKERRQ(ierr);
2947   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2948   a = (Mat_SeqBAIJ*)B->data;
2949 
2950   /* set matrix "i" values */
2951   a->i[0] = 0;
2952   for (i=1; i<= mbs; i++) {
2953     a->i[i]      = a->i[i-1] + browlengths[i-1];
2954     a->ilen[i-1] = browlengths[i-1];
2955   }
2956   a->nz         = 0;
2957   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2958 
2959   /* read in nonzero values */
2960   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2961   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2962   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2963 
2964   /* set "a" and "j" values into matrix */
2965   nzcount = 0; jcount = 0;
2966   for (i=0; i<mbs; i++) {
2967     nzcountb = nzcount;
2968     nmask    = 0;
2969     for (j=0; j<bs; j++) {
2970       kmax = rowlengths[i*bs+j];
2971       for (k=0; k<kmax; k++) {
2972         tmp = jj[nzcount++]/bs;
2973 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2974       }
2975     }
2976     /* sort the masked values */
2977     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2978 
2979     /* set "j" values into matrix */
2980     maskcount = 1;
2981     for (j=0; j<nmask; j++) {
2982       a->j[jcount++]  = masked[j];
2983       mask[masked[j]] = maskcount++;
2984     }
2985     /* set "a" values into matrix */
2986     ishift = bs2*a->i[i];
2987     for (j=0; j<bs; j++) {
2988       kmax = rowlengths[i*bs+j];
2989       for (k=0; k<kmax; k++) {
2990         tmp       = jj[nzcountb]/bs ;
2991         block     = mask[tmp] - 1;
2992         point     = jj[nzcountb] - bs*tmp;
2993         idx       = ishift + bs2*block + j + bs*point;
2994         a->a[idx] = (MatScalar)aa[nzcountb++];
2995       }
2996     }
2997     /* zero out the mask elements we set */
2998     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2999   }
3000   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3001 
3002   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3003   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3004   ierr = PetscFree(aa);CHKERRQ(ierr);
3005   ierr = PetscFree(jj);CHKERRQ(ierr);
3006   ierr = PetscFree(mask);CHKERRQ(ierr);
3007 
3008   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3009   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3010   ierr = MatView_Private(B);CHKERRQ(ierr);
3011 
3012   *A = B;
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 #undef __FUNCT__
3017 #define __FUNCT__ "MatCreateSeqBAIJ"
3018 /*@C
3019    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3020    compressed row) format.  For good matrix assembly performance the
3021    user should preallocate the matrix storage by setting the parameter nz
3022    (or the array nnz).  By setting these parameters accurately, performance
3023    during matrix assembly can be increased by more than a factor of 50.
3024 
3025    Collective on MPI_Comm
3026 
3027    Input Parameters:
3028 +  comm - MPI communicator, set to PETSC_COMM_SELF
3029 .  bs - size of block
3030 .  m - number of rows
3031 .  n - number of columns
3032 .  nz - number of nonzero blocks  per block row (same for all rows)
3033 -  nnz - array containing the number of nonzero blocks in the various block rows
3034          (possibly different for each block row) or PETSC_NULL
3035 
3036    Output Parameter:
3037 .  A - the matrix
3038 
3039    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3040    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3041    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3042    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3043 
3044    Options Database Keys:
3045 .   -mat_no_unroll - uses code that does not unroll the loops in the
3046                      block calculations (much slower)
3047 .    -mat_block_size - size of the blocks to use
3048 
3049    Level: intermediate
3050 
3051    Notes:
3052    The number of rows and columns must be divisible by blocksize.
3053 
3054    If the nnz parameter is given then the nz parameter is ignored
3055 
3056    A nonzero block is any block that as 1 or more nonzeros in it
3057 
3058    The block AIJ format is fully compatible with standard Fortran 77
3059    storage.  That is, the stored row and column indices can begin at
3060    either one (as in Fortran) or zero.  See the users' manual for details.
3061 
3062    Specify the preallocated storage with either nz or nnz (not both).
3063    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3064    allocation.  For additional details, see the users manual chapter on
3065    matrices.
3066 
3067 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3068 @*/
3069 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3070 {
3071   PetscErrorCode ierr;
3072 
3073   PetscFunctionBegin;
3074   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3075   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3076   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3077   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3078   PetscFunctionReturn(0);
3079 }
3080 
3081 #undef __FUNCT__
3082 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3083 /*@C
3084    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3085    per row in the matrix. For good matrix assembly performance the
3086    user should preallocate the matrix storage by setting the parameter nz
3087    (or the array nnz).  By setting these parameters accurately, performance
3088    during matrix assembly can be increased by more than a factor of 50.
3089 
3090    Collective on MPI_Comm
3091 
3092    Input Parameters:
3093 +  A - the matrix
3094 .  bs - size of block
3095 .  nz - number of block nonzeros per block row (same for all rows)
3096 -  nnz - array containing the number of block nonzeros in the various block rows
3097          (possibly different for each block row) or PETSC_NULL
3098 
3099    Options Database Keys:
3100 .   -mat_no_unroll - uses code that does not unroll the loops in the
3101                      block calculations (much slower)
3102 .    -mat_block_size - size of the blocks to use
3103 
3104    Level: intermediate
3105 
3106    Notes:
3107    If the nnz parameter is given then the nz parameter is ignored
3108 
3109    You can call MatGetInfo() to get information on how effective the preallocation was;
3110    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3111    You can also run with the option -info and look for messages with the string
3112    malloc in them to see if additional memory allocation was needed.
3113 
3114    The block AIJ format is fully compatible with standard Fortran 77
3115    storage.  That is, the stored row and column indices can begin at
3116    either one (as in Fortran) or zero.  See the users' manual for details.
3117 
3118    Specify the preallocated storage with either nz or nnz (not both).
3119    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3120    allocation.  For additional details, see the users manual chapter on
3121    matrices.
3122 
3123 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3124 @*/
3125 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3126 {
3127   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3128 
3129   PetscFunctionBegin;
3130   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3131   if (f) {
3132     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3133   }
3134   PetscFunctionReturn(0);
3135 }
3136 
3137 #undef __FUNCT__
3138 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3139 /*@
3140      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3141               (upper triangular entries in CSR format) provided by the user.
3142 
3143      Collective on MPI_Comm
3144 
3145    Input Parameters:
3146 +  comm - must be an MPI communicator of size 1
3147 .  bs - size of block
3148 .  m - number of rows
3149 .  n - number of columns
3150 .  i - row indices
3151 .  j - column indices
3152 -  a - matrix values
3153 
3154    Output Parameter:
3155 .  mat - the matrix
3156 
3157    Level: intermediate
3158 
3159    Notes:
3160        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3161     once the matrix is destroyed
3162 
3163        You cannot set new nonzero locations into this matrix, that will generate an error.
3164 
3165        The i and j indices are 0 based
3166 
3167 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3168 
3169 @*/
3170 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3171 {
3172   PetscErrorCode ierr;
3173   PetscInt       ii;
3174   Mat_SeqBAIJ    *baij;
3175 
3176   PetscFunctionBegin;
3177   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3178   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3179 
3180   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3181   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3182   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3183   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3184   baij = (Mat_SeqBAIJ*)(*mat)->data;
3185   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3186 
3187   baij->i = i;
3188   baij->j = j;
3189   baij->a = a;
3190   baij->singlemalloc = PETSC_FALSE;
3191   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3192   baij->free_a       = PETSC_FALSE;
3193   baij->free_ij       = PETSC_FALSE;
3194 
3195   for (ii=0; ii<m; ii++) {
3196     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3197 #if defined(PETSC_USE_DEBUG)
3198     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3199 #endif
3200   }
3201 #if defined(PETSC_USE_DEBUG)
3202   for (ii=0; ii<baij->i[m]; ii++) {
3203     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3204     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3205   }
3206 #endif
3207 
3208   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3209   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3210   PetscFunctionReturn(0);
3211 }
3212