xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 5c9eb25f753630cfd361293e05e7358a00a954ac)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
52   PetscReal      shift = 0.0;
53 
54   PetscFunctionBegin;
55   if (a->idiagvalid) PetscFunctionReturn(0);
56   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
57   diag_offset = a->diag;
58   if (!a->idiag) {
59     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
60   }
61   diag  = a->idiag;
62   mdiag = a->idiag+bs*bs*mbs;
63   /* factor and invert each block */
64   switch (bs){
65     case 2:
66       for (i=0; i<mbs; i++) {
67         odiag   = v + 4*diag_offset[i];
68         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
70 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
71 	diag    += 4;
72 	mdiag   += 4;
73       }
74       break;
75     case 3:
76       for (i=0; i<mbs; i++) {
77         odiag    = v + 9*diag_offset[i];
78         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
80         diag[8]  = odiag[8];
81         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
82         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
83         mdiag[8] = odiag[8];
84 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
85 	diag    += 9;
86 	mdiag   += 9;
87       }
88       break;
89     case 4:
90       for (i=0; i<mbs; i++) {
91         odiag  = v + 16*diag_offset[i];
92         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
94 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
95 	diag  += 16;
96 	mdiag += 16;
97       }
98       break;
99     case 5:
100       for (i=0; i<mbs; i++) {
101         odiag = v + 25*diag_offset[i];
102         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
105 	diag  += 25;
106 	mdiag += 25;
107       }
108       break;
109     default:
110       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
111   }
112   a->idiagvalid = PETSC_TRUE;
113   PetscFunctionReturn(0);
114 }
115 EXTERN_C_END
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
120 {
121   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
122   PetscScalar        *x,x1,s1;
123   const PetscScalar  *b;
124   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
125   PetscErrorCode     ierr;
126   PetscInt           m = a->mbs,i,i2,nz,idx;
127   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
128 
129   PetscFunctionBegin;
130   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
131   its = its*lits;
132   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
133   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
134   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
135   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
136   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
137 
138   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
139 
140   diag  = a->diag;
141   idiag = a->idiag;
142   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
143   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
144 
145   if (flag & SOR_ZERO_INITIAL_GUESS) {
146     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
147       x[0] = b[0]*idiag[0];
148       i2     = 1;
149       idiag += 1;
150       for (i=1; i<m; i++) {
151         v     = aa + ai[i];
152         vi    = aj + ai[i];
153         nz    = diag[i] - ai[i];
154         s1    = b[i2];
155         while (nz--) {
156           idx  = (*vi++);
157           x1   = x[idx];
158           s1  -= v[0]*x1;
159           v   += 1;
160         }
161         x[i2]   = idiag[0]*s1;
162         idiag   += 1;
163         i2      += 1;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2];
174         x[i2]   = mdiag[0]*x1;
175         mdiag  += 1;
176         i2     += 1;
177       }
178       ierr = PetscLogFlops(m);CHKERRQ(ierr);
179     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
180       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
181     }
182     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
183       idiag   = a->idiag+a->mbs - 1;
184       i2      = m - 1;
185       x1      = x[i2];
186       x[i2]   = idiag[0]*x1;
187       idiag -= 1;
188       i2    -= 1;
189       for (i=m-2; i>=0; i--) {
190         v     = aa + (diag[i]+1);
191         vi    = aj + diag[i] + 1;
192         nz    = ai[i+1] - diag[i] - 1;
193         s1    = x[i2];
194         while (nz--) {
195           idx  = (*vi++);
196           x1   = x[idx];
197           s1  -= v[0]*x1;
198           v   += 1;
199         }
200         x[i2]   = idiag[0]*s1;
201         idiag   -= 2;
202         i2      -= 1;
203       }
204       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
205     }
206   } else {
207     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
208   }
209   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
210   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
217 {
218   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
219   PetscScalar        *x,x1,x2,s1,s2;
220   const PetscScalar  *b;
221   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
222   PetscErrorCode     ierr;
223   PetscInt           m = a->mbs,i,i2,nz,idx;
224   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
225 
226   PetscFunctionBegin;
227   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
228   its = its*lits;
229   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
233   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
234 
235   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
236 
237   diag  = a->diag;
238   idiag = a->idiag;
239   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
240   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
241 
242   if (flag & SOR_ZERO_INITIAL_GUESS) {
243     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
244       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
245       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
246       i2     = 2;
247       idiag += 4;
248       for (i=1; i<m; i++) {
249 	v     = aa + 4*ai[i];
250 	vi    = aj + ai[i];
251 	nz    = diag[i] - ai[i];
252 	s1    = b[i2]; s2 = b[i2+1];
253 	while (nz--) {
254 	  idx  = 2*(*vi++);
255 	  x1   = x[idx]; x2 = x[1+idx];
256 	  s1  -= v[0]*x1 + v[2]*x2;
257 	  s2  -= v[1]*x1 + v[3]*x2;
258 	  v   += 4;
259 	}
260 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
261 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
262         idiag   += 4;
263         i2      += 2;
264       }
265       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
266       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
267     }
268     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
269         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
270       i2    = 0;
271       mdiag = a->idiag+4*a->mbs;
272       for (i=0; i<m; i++) {
273         x1      = x[i2]; x2 = x[i2+1];
274         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
275         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
276         mdiag  += 4;
277         i2     += 2;
278       }
279       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
280     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
281       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
282     }
283     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
284       idiag   = a->idiag+4*a->mbs - 4;
285       i2      = 2*m - 2;
286       x1      = x[i2]; x2 = x[i2+1];
287       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
288       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
289       idiag -= 4;
290       i2    -= 2;
291       for (i=m-2; i>=0; i--) {
292 	v     = aa + 4*(diag[i]+1);
293 	vi    = aj + diag[i] + 1;
294 	nz    = ai[i+1] - diag[i] - 1;
295 	s1    = x[i2]; s2 = x[i2+1];
296 	while (nz--) {
297 	  idx  = 2*(*vi++);
298 	  x1   = x[idx]; x2 = x[1+idx];
299 	  s1  -= v[0]*x1 + v[2]*x2;
300 	  s2  -= v[1]*x1 + v[3]*x2;
301 	  v   += 4;
302 	}
303 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
304 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
305         idiag   -= 4;
306         i2      -= 2;
307       }
308       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
309     }
310   } else {
311     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
312   }
313   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
314   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
321 {
322   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
323   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
324   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
325   const PetscScalar  *b;
326   PetscErrorCode     ierr;
327   PetscInt           m = a->mbs,i,i2,nz,idx;
328   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
329 
330   PetscFunctionBegin;
331   its = its*lits;
332   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
333   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
334   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
335   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
336   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
337   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
338 
339   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
340 
341   diag  = a->diag;
342   idiag = a->idiag;
343   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
344   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
345 
346   if (flag & SOR_ZERO_INITIAL_GUESS) {
347     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
348       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
349       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
350       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
351       i2     = 3;
352       idiag += 9;
353       for (i=1; i<m; i++) {
354         v     = aa + 9*ai[i];
355         vi    = aj + ai[i];
356         nz    = diag[i] - ai[i];
357         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
358         while (nz--) {
359           idx  = 3*(*vi++);
360           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
361           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
362           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
363           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
364           v   += 9;
365         }
366         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
367         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
368         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
369         idiag   += 9;
370         i2      += 3;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
374     }
375     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
376         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
377       i2    = 0;
378       mdiag = a->idiag+9*a->mbs;
379       for (i=0; i<m; i++) {
380         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
381         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
382         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
383         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
384         mdiag  += 9;
385         i2     += 3;
386       }
387       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
388     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
390     }
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
392       idiag   = a->idiag+9*a->mbs - 9;
393       i2      = 3*m - 3;
394       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
396       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
397       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
398       idiag -= 9;
399       i2    -= 3;
400       for (i=m-2; i>=0; i--) {
401         v     = aa + 9*(diag[i]+1);
402         vi    = aj + diag[i] + 1;
403         nz    = ai[i+1] - diag[i] - 1;
404         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
405         while (nz--) {
406           idx  = 3*(*vi++);
407           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
408           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
409           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
410           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
411           v   += 9;
412         }
413         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
414         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
415         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
416         idiag   -= 9;
417         i2      -= 3;
418       }
419       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
420     }
421   } else {
422     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
423   }
424   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
425   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
432 {
433   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
434   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
435   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
436   const PetscScalar  *b;
437   PetscErrorCode     ierr;
438   PetscInt           m = a->mbs,i,i2,nz,idx;
439   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
440 
441   PetscFunctionBegin;
442   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
443   its = its*lits;
444   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
445   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
446   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
447   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
448   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
449 
450   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
451 
452   diag  = a->diag;
453   idiag = a->idiag;
454   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
455   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
456 
457   if (flag & SOR_ZERO_INITIAL_GUESS) {
458     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
459       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
460       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
461       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
462       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
463       i2     = 4;
464       idiag += 16;
465       for (i=1; i<m; i++) {
466 	v     = aa + 16*ai[i];
467 	vi    = aj + ai[i];
468 	nz    = diag[i] - ai[i];
469 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
470 	while (nz--) {
471 	  idx  = 4*(*vi++);
472 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
473 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
474 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
475 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
476 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
477 	  v   += 16;
478 	}
479 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
480 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
481 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
482 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
483         idiag   += 16;
484         i2      += 4;
485       }
486       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
487       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
488     }
489     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
490         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
491       i2    = 0;
492       mdiag = a->idiag+16*a->mbs;
493       for (i=0; i<m; i++) {
494         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
495         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
496         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
497         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
498         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
499         mdiag  += 16;
500         i2     += 4;
501       }
502       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
503     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
504       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
505     }
506     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
507       idiag   = a->idiag+16*a->mbs - 16;
508       i2      = 4*m - 4;
509       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
510       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
511       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
512       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
513       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
514       idiag -= 16;
515       i2    -= 4;
516       for (i=m-2; i>=0; i--) {
517 	v     = aa + 16*(diag[i]+1);
518 	vi    = aj + diag[i] + 1;
519 	nz    = ai[i+1] - diag[i] - 1;
520 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
521 	while (nz--) {
522 	  idx  = 4*(*vi++);
523 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
524 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
525 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
526 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
527 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
528 	  v   += 16;
529 	}
530 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
531 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
532 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
533 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
534         idiag   -= 16;
535         i2      -= 4;
536       }
537       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
538     }
539   } else {
540     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
541   }
542   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
543   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
550 {
551   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
552   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
553   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
554   const PetscScalar  *b;
555   PetscErrorCode     ierr;
556   PetscInt           m = a->mbs,i,i2,nz,idx;
557   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
558 
559   PetscFunctionBegin;
560   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
561   its = its*lits;
562   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
563   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
564   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
565   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
566   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
567 
568   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
569 
570   diag  = a->diag;
571   idiag = a->idiag;
572   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
573   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
574 
575   if (flag & SOR_ZERO_INITIAL_GUESS) {
576     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
577       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
578       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
579       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
580       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
581       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
582       i2     = 5;
583       idiag += 25;
584       for (i=1; i<m; i++) {
585 	v     = aa + 25*ai[i];
586 	vi    = aj + ai[i];
587 	nz    = diag[i] - ai[i];
588 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
589 	while (nz--) {
590 	  idx  = 5*(*vi++);
591 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
592 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
593 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
594 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
595 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
596 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
597 	  v   += 25;
598 	}
599 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
600 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
601 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
602 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
603 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
604         idiag   += 25;
605         i2      += 5;
606       }
607       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
608       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
609     }
610     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
611         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
612       i2    = 0;
613       mdiag = a->idiag+25*a->mbs;
614       for (i=0; i<m; i++) {
615         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
616         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
617         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
618         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
619         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
620         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
621         mdiag  += 25;
622         i2     += 5;
623       }
624       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
625     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
626       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
627     }
628     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
629       idiag   = a->idiag+25*a->mbs - 25;
630       i2      = 5*m - 5;
631       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
632       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
633       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
634       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
635       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
636       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
637       idiag -= 25;
638       i2    -= 5;
639       for (i=m-2; i>=0; i--) {
640 	v     = aa + 25*(diag[i]+1);
641 	vi    = aj + diag[i] + 1;
642 	nz    = ai[i+1] - diag[i] - 1;
643 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
644 	while (nz--) {
645 	  idx  = 5*(*vi++);
646 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
647 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
648 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
649 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
650 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
651 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
652 	  v   += 25;
653 	}
654 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
655 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
656 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
657 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
658 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
659         idiag   -= 25;
660         i2      -= 5;
661       }
662       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
663     }
664   } else {
665     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
666   }
667   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
668   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNCT__
673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
675 {
676   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
677   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
678   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
679   const PetscScalar  *b;
680   PetscErrorCode     ierr;
681   PetscInt           m = a->mbs,i,i2,nz,idx;
682   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
683 
684   PetscFunctionBegin;
685   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
686   its = its*lits;
687   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
688   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
689   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
690   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
691   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
692 
693   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
694 
695   diag  = a->diag;
696   idiag = a->idiag;
697   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
698   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
699 
700   if (flag & SOR_ZERO_INITIAL_GUESS) {
701     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
702       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
703       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
704       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
705       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
706       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
707       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
708       i2     = 6;
709       idiag += 36;
710       for (i=1; i<m; i++) {
711         v     = aa + 36*ai[i];
712         vi    = aj + ai[i];
713         nz    = diag[i] - ai[i];
714         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
715         while (nz--) {
716           idx  = 6*(*vi++);
717           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
718           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
719           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
720           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
721           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
722           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
723           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
724           v   += 36;
725         }
726         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
727         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
728         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
729         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
730         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
731         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
732         idiag   += 36;
733         i2      += 6;
734       }
735       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
736       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
737     }
738     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
739         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
740       i2    = 0;
741       mdiag = a->idiag+36*a->mbs;
742       for (i=0; i<m; i++) {
743         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
744         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
745         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
746         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
747         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
748         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
749         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
750         mdiag  += 36;
751         i2     += 6;
752       }
753       ierr = PetscLogFlops(60*m);CHKERRQ(ierr);
754     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
755       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
756     }
757     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
758       idiag   = a->idiag+36*a->mbs - 36;
759       i2      = 6*m - 6;
760       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
761       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
762       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
763       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
764       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
765       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
766       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
767       idiag -= 36;
768       i2    -= 6;
769       for (i=m-2; i>=0; i--) {
770         v     = aa + 36*(diag[i]+1);
771         vi    = aj + diag[i] + 1;
772         nz    = ai[i+1] - diag[i] - 1;
773         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
774         while (nz--) {
775           idx  = 6*(*vi++);
776           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
777           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
778           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
779           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
780           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
781           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
782           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
783           v   += 36;
784         }
785         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
786         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
787         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
788         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
789         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
790         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
791         idiag   -= 36;
792         i2      -= 6;
793       }
794       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
795     }
796   } else {
797     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
798   }
799   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
800   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
807 {
808   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
809   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
810   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
811   const PetscScalar  *b;
812   PetscErrorCode     ierr;
813   PetscInt           m = a->mbs,i,i2,nz,idx;
814   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
815 
816   PetscFunctionBegin;
817   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
818   its = its*lits;
819   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
821   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
822   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
823   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
824 
825   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
826 
827   diag  = a->diag;
828   idiag = a->idiag;
829   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
830   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
831 
832   if (flag & SOR_ZERO_INITIAL_GUESS) {
833     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
834       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
835       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
836       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
837       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
838       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
839       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
840       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
841       i2     = 7;
842       idiag += 49;
843       for (i=1; i<m; i++) {
844         v     = aa + 49*ai[i];
845         vi    = aj + ai[i];
846         nz    = diag[i] - ai[i];
847         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
848         while (nz--) {
849           idx  = 7*(*vi++);
850           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
851           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
852           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
853           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
854           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
855           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
856           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
857           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
858           v   += 49;
859         }
860         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
861         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
862         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
863         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
864         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
865         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
866         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
867         idiag   += 49;
868         i2      += 7;
869       }
870       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
871       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
872     }
873     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
874         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
875       i2    = 0;
876       mdiag = a->idiag+49*a->mbs;
877       for (i=0; i<m; i++) {
878         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
879         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
880         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
881         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
882         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
883         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
884         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
885         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
886         mdiag  += 36;
887         i2     += 6;
888       }
889       ierr = PetscLogFlops(93*m);CHKERRQ(ierr);
890     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
891       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
892     }
893     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
894       idiag   = a->idiag+49*a->mbs - 49;
895       i2      = 7*m - 7;
896       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
897       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
898       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
899       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
900       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
901       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
902       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
903       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
904       idiag -= 49;
905       i2    -= 7;
906       for (i=m-2; i>=0; i--) {
907         v     = aa + 49*(diag[i]+1);
908         vi    = aj + diag[i] + 1;
909         nz    = ai[i+1] - diag[i] - 1;
910         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
911         while (nz--) {
912           idx  = 7*(*vi++);
913           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
914           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
915           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
916           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
917           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
918           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
919           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
920           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
921           v   += 49;
922         }
923         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
924         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
925         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
926         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
927         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
928         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
929         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
930         idiag   -= 49;
931         i2      -= 7;
932       }
933       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
934     }
935   } else {
936     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
937   }
938   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
939   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 /*
944     Special version for direct calls from Fortran (Used in PETSc-fun3d)
945 */
946 #if defined(PETSC_HAVE_FORTRAN_CAPS)
947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
949 #define matsetvaluesblocked4_ matsetvaluesblocked4
950 #endif
951 
952 EXTERN_C_BEGIN
953 #undef __FUNCT__
954 #define __FUNCT__ "matsetvaluesblocked4_"
955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
956 {
957   Mat               A = *AA;
958   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
959   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
960   PetscInt          *ai=a->i,*ailen=a->ilen;
961   PetscInt          *aj=a->j,stepval,lastcol = -1;
962   const PetscScalar *value = v;
963   MatScalar         *ap,*aa = a->a,*bap;
964 
965   PetscFunctionBegin;
966   if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
967   stepval = (n-1)*4;
968   for (k=0; k<m; k++) { /* loop over added rows */
969     row  = im[k];
970     rp   = aj + ai[row];
971     ap   = aa + 16*ai[row];
972     nrow = ailen[row];
973     low  = 0;
974     high = nrow;
975     for (l=0; l<n; l++) { /* loop over added columns */
976       col = in[l];
977       if (col <= lastcol) low = 0; else high = nrow;
978       lastcol = col;
979       value = v + k*(stepval+4 + l)*4;
980       while (high-low > 7) {
981         t = (low+high)/2;
982         if (rp[t] > col) high = t;
983         else             low  = t;
984       }
985       for (i=low; i<high; i++) {
986         if (rp[i] > col) break;
987         if (rp[i] == col) {
988           bap  = ap +  16*i;
989           for (ii=0; ii<4; ii++,value+=stepval) {
990             for (jj=ii; jj<16; jj+=4) {
991               bap[jj] += *value++;
992             }
993           }
994           goto noinsert2;
995         }
996       }
997       N = nrow++ - 1;
998       high++; /* added new column index thus must search to one higher than before */
999       /* shift up all the later entries in this row */
1000       for (ii=N; ii>=i; ii--) {
1001         rp[ii+1] = rp[ii];
1002         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1003       }
1004       if (N >= i) {
1005         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1006       }
1007       rp[i] = col;
1008       bap   = ap +  16*i;
1009       for (ii=0; ii<4; ii++,value+=stepval) {
1010         for (jj=ii; jj<16; jj+=4) {
1011           bap[jj] = *value++;
1012         }
1013       }
1014       noinsert2:;
1015       low = i;
1016     }
1017     ailen[row] = nrow;
1018   }
1019   PetscFunctionReturnVoid();
1020 }
1021 EXTERN_C_END
1022 
1023 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1024 #define matsetvalues4_ MATSETVALUES4
1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1026 #define matsetvalues4_ matsetvalues4
1027 #endif
1028 
1029 EXTERN_C_BEGIN
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatSetValues4_"
1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1033 {
1034   Mat         A = *AA;
1035   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1036   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1037   PetscInt    *ai=a->i,*ailen=a->ilen;
1038   PetscInt    *aj=a->j,brow,bcol;
1039   PetscInt    ridx,cidx,lastcol = -1;
1040   MatScalar   *ap,value,*aa=a->a,*bap;
1041 
1042   PetscFunctionBegin;
1043   for (k=0; k<m; k++) { /* loop over added rows */
1044     row  = im[k]; brow = row/4;
1045     rp   = aj + ai[brow];
1046     ap   = aa + 16*ai[brow];
1047     nrow = ailen[brow];
1048     low  = 0;
1049     high = nrow;
1050     for (l=0; l<n; l++) { /* loop over added columns */
1051       col = in[l]; bcol = col/4;
1052       ridx = row % 4; cidx = col % 4;
1053       value = v[l + k*n];
1054       if (col <= lastcol) low = 0; else high = nrow;
1055       lastcol = col;
1056       while (high-low > 7) {
1057         t = (low+high)/2;
1058         if (rp[t] > bcol) high = t;
1059         else              low  = t;
1060       }
1061       for (i=low; i<high; i++) {
1062         if (rp[i] > bcol) break;
1063         if (rp[i] == bcol) {
1064           bap  = ap +  16*i + 4*cidx + ridx;
1065           *bap += value;
1066           goto noinsert1;
1067         }
1068       }
1069       N = nrow++ - 1;
1070       high++; /* added new column thus must search to one higher than before */
1071       /* shift up all the later entries in this row */
1072       for (ii=N; ii>=i; ii--) {
1073         rp[ii+1] = rp[ii];
1074         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1075       }
1076       if (N>=i) {
1077         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1078       }
1079       rp[i]                    = bcol;
1080       ap[16*i + 4*cidx + ridx] = value;
1081       noinsert1:;
1082       low = i;
1083     }
1084     ailen[brow] = nrow;
1085   }
1086   PetscFunctionReturnVoid();
1087 }
1088 EXTERN_C_END
1089 
1090 /*
1091      Checks for missing diagonals
1092 */
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1096 {
1097   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098   PetscErrorCode ierr;
1099   PetscInt       *diag,*jj = a->j,i;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1103   *missing = PETSC_FALSE;
1104   diag     = a->diag;
1105   for (i=0; i<a->mbs; i++) {
1106     if (jj[diag[i]] != i) {
1107       *missing  = PETSC_TRUE;
1108       if (d) *d = i;
1109     }
1110   }
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1116 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1117 {
1118   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   PetscInt       i,j,m = a->mbs;
1121 
1122   PetscFunctionBegin;
1123   if (!a->diag) {
1124     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1125   }
1126   for (i=0; i<m; i++) {
1127     a->diag[i] = a->i[i+1];
1128     for (j=a->i[i]; j<a->i[i+1]; j++) {
1129       if (a->j[j] == i) {
1130         a->diag[i] = j;
1131         break;
1132       }
1133     }
1134   }
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 
1139 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1143 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1144 {
1145   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1146   PetscErrorCode ierr;
1147   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt;
1148   PetscInt       *tia, *tja;
1149 
1150   PetscFunctionBegin;
1151   *nn = n;
1152   if (!ia) PetscFunctionReturn(0);
1153   if (symmetric) {
1154     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1155   } else {
1156     tia = a->i; tja = a->j;
1157   }
1158 
1159   if (!blockcompressed && bs > 1) {
1160     (*nn) *= bs;
1161     nbs    = bs;
1162     /* malloc & create the natural set of indices */
1163     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1164     if (n) {
1165       (*ia)[0] = 0;
1166       for (j=1; j<bs; j++) {
1167         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1168       }
1169     }
1170 
1171     for (i=1; i<n; i++) {
1172       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1173       for (j=1; j<bs; j++) {
1174         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1175       }
1176     }
1177     if (n) {
1178       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1179     }
1180 
1181     if (ja) {
1182       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1183       cnt = 0;
1184       for (i=0; i<n; i++) {
1185         for (j=0; j<bs; j++) {
1186           for (k=tia[i]; k<tia[i+1]; k++) {
1187             for (l=0; l<bs; l++) {
1188               (*ja)[cnt++] = bs*tja[k] + l;
1189 	    }
1190           }
1191         }
1192       }
1193     }
1194 
1195     n     *= bs;
1196     nz *= bs*bs;
1197     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1198       ierr = PetscFree(tia);CHKERRQ(ierr);
1199       ierr = PetscFree(tja);CHKERRQ(ierr);
1200     }
1201   } else {
1202     *ia = tia;
1203     if (ja) *ja = tja;
1204   }
1205   if (oshift == 1) {
1206     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1207     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1214 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1215 {
1216   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1217   PetscErrorCode ierr;
1218   PetscInt       i,n = a->mbs,nz = a->i[n];
1219 
1220   PetscFunctionBegin;
1221   if (!ia) PetscFunctionReturn(0);
1222   if (!blockcompressed && A->rmap.bs > 1) {
1223     ierr = PetscFree(*ia);CHKERRQ(ierr);
1224     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1225   } else if (symmetric) {
1226     ierr = PetscFree(*ia);CHKERRQ(ierr);
1227     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1228   } else if (oshift == 1) { /* blockcompressed */
1229     for (i=0; i<n+1; i++) a->i[i]--;
1230     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1237 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1238 {
1239   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243 #if defined(PETSC_USE_LOG)
1244   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
1245 #endif
1246   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1247   if (a->row) {
1248     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1249   }
1250   if (a->col) {
1251     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1254   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1255   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1256   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1257   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1258   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1259   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1260   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1261   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1262 
1263   ierr = PetscFree(a);CHKERRQ(ierr);
1264 
1265   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1266   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1267   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1278 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1279 {
1280   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1281   PetscErrorCode ierr;
1282 
1283   PetscFunctionBegin;
1284   switch (op) {
1285   case MAT_ROW_ORIENTED:
1286     a->roworiented    = flg;
1287     break;
1288   case MAT_KEEP_ZEROED_ROWS:
1289     a->keepzeroedrows = flg;
1290     break;
1291   case MAT_NEW_NONZERO_LOCATIONS:
1292     a->nonew          = (flg ? 0 : 1);
1293     break;
1294   case MAT_NEW_NONZERO_LOCATION_ERR:
1295     a->nonew          = (flg ? -1 : 0);
1296     break;
1297   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1298     a->nonew          = (flg ? -2 : 0);
1299     break;
1300   case MAT_NEW_DIAGONALS:
1301   case MAT_IGNORE_OFF_PROC_ENTRIES:
1302   case MAT_USE_HASH_TABLE:
1303     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1304     break;
1305   case MAT_SYMMETRIC:
1306   case MAT_STRUCTURALLY_SYMMETRIC:
1307   case MAT_HERMITIAN:
1308   case MAT_SYMMETRY_ETERNAL:
1309     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1310     break;
1311   default:
1312     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1313   }
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1319 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1320 {
1321   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1322   PetscErrorCode ierr;
1323   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1324   MatScalar      *aa,*aa_i;
1325   PetscScalar    *v_i;
1326 
1327   PetscFunctionBegin;
1328   bs  = A->rmap.bs;
1329   ai  = a->i;
1330   aj  = a->j;
1331   aa  = a->a;
1332   bs2 = a->bs2;
1333 
1334   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1335 
1336   bn  = row/bs;   /* Block number */
1337   bp  = row % bs; /* Block Position */
1338   M   = ai[bn+1] - ai[bn];
1339   *nz = bs*M;
1340 
1341   if (v) {
1342     *v = 0;
1343     if (*nz) {
1344       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1345       for (i=0; i<M; i++) { /* for each block in the block row */
1346         v_i  = *v + i*bs;
1347         aa_i = aa + bs2*(ai[bn] + i);
1348         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1349       }
1350     }
1351   }
1352 
1353   if (idx) {
1354     *idx = 0;
1355     if (*nz) {
1356       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1357       for (i=0; i<M; i++) { /* for each block in the block row */
1358         idx_i = *idx + i*bs;
1359         itmp  = bs*aj[ai[bn] + i];
1360         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1361       }
1362     }
1363   }
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1369 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1370 {
1371   PetscErrorCode ierr;
1372 
1373   PetscFunctionBegin;
1374   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1375   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1381 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1382 {
1383   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1384   Mat            C;
1385   PetscErrorCode ierr;
1386   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1387   PetscInt       *rows,*cols,bs2=a->bs2;
1388   MatScalar      *array;
1389 
1390   PetscFunctionBegin;
1391   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1392   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1393     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1394     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1395 
1396     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1397     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1398     ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1399     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1400     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1401     ierr = PetscFree(col);CHKERRQ(ierr);
1402   } else {
1403     C = *B;
1404   }
1405 
1406   array = a->a;
1407   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1408   cols = rows + bs;
1409   for (i=0; i<mbs; i++) {
1410     cols[0] = i*bs;
1411     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1412     len = ai[i+1] - ai[i];
1413     for (j=0; j<len; j++) {
1414       rows[0] = (*aj++)*bs;
1415       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1416       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1417       array += bs2;
1418     }
1419   }
1420   ierr = PetscFree(rows);CHKERRQ(ierr);
1421 
1422   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1423   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1424 
1425   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1426     *B = C;
1427   } else {
1428     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1429   }
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1435 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1436 {
1437   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1438   PetscErrorCode ierr;
1439   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1440   int            fd;
1441   PetscScalar    *aa;
1442   FILE           *file;
1443 
1444   PetscFunctionBegin;
1445   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1446   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1447   col_lens[0] = MAT_FILE_COOKIE;
1448 
1449   col_lens[1] = A->rmap.N;
1450   col_lens[2] = A->cmap.n;
1451   col_lens[3] = a->nz*bs2;
1452 
1453   /* store lengths of each row and write (including header) to file */
1454   count = 0;
1455   for (i=0; i<a->mbs; i++) {
1456     for (j=0; j<bs; j++) {
1457       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1458     }
1459   }
1460   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1461   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1462 
1463   /* store column indices (zero start index) */
1464   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1465   count = 0;
1466   for (i=0; i<a->mbs; i++) {
1467     for (j=0; j<bs; j++) {
1468       for (k=a->i[i]; k<a->i[i+1]; k++) {
1469         for (l=0; l<bs; l++) {
1470           jj[count++] = bs*a->j[k] + l;
1471         }
1472       }
1473     }
1474   }
1475   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1476   ierr = PetscFree(jj);CHKERRQ(ierr);
1477 
1478   /* store nonzero values */
1479   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1480   count = 0;
1481   for (i=0; i<a->mbs; i++) {
1482     for (j=0; j<bs; j++) {
1483       for (k=a->i[i]; k<a->i[i+1]; k++) {
1484         for (l=0; l<bs; l++) {
1485           aa[count++] = a->a[bs2*k + l*bs + j];
1486         }
1487       }
1488     }
1489   }
1490   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1491   ierr = PetscFree(aa);CHKERRQ(ierr);
1492 
1493   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1494   if (file) {
1495     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1496   }
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1502 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1503 {
1504   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1505   PetscErrorCode    ierr;
1506   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1507   PetscViewerFormat format;
1508 
1509   PetscFunctionBegin;
1510   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1511   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1512     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1513   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1514     Mat aij;
1515     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1516     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1517     ierr = MatDestroy(aij);CHKERRQ(ierr);
1518   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1519      PetscFunctionReturn(0);
1520   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1521     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1522     for (i=0; i<a->mbs; i++) {
1523       for (j=0; j<bs; j++) {
1524         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1525         for (k=a->i[i]; k<a->i[i+1]; k++) {
1526           for (l=0; l<bs; l++) {
1527 #if defined(PETSC_USE_COMPLEX)
1528             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1529               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1530                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1531             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1532               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1533                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1534             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1535               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1536             }
1537 #else
1538             if (a->a[bs2*k + l*bs + j] != 0.0) {
1539               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1540             }
1541 #endif
1542           }
1543         }
1544         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1545       }
1546     }
1547     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1548   } else {
1549     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1550     for (i=0; i<a->mbs; i++) {
1551       for (j=0; j<bs; j++) {
1552         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1553         for (k=a->i[i]; k<a->i[i+1]; k++) {
1554           for (l=0; l<bs; l++) {
1555 #if defined(PETSC_USE_COMPLEX)
1556             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1557               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1558                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1559             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1560               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1561                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1562             } else {
1563               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1564             }
1565 #else
1566             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1567 #endif
1568           }
1569         }
1570         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1571       }
1572     }
1573     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1574   }
1575   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1581 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1582 {
1583   Mat            A = (Mat) Aa;
1584   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1585   PetscErrorCode ierr;
1586   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1587   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1588   MatScalar      *aa;
1589   PetscViewer    viewer;
1590 
1591   PetscFunctionBegin;
1592 
1593   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1594   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1595 
1596   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1597 
1598   /* loop over matrix elements drawing boxes */
1599   color = PETSC_DRAW_BLUE;
1600   for (i=0,row=0; i<mbs; i++,row+=bs) {
1601     for (j=a->i[i]; j<a->i[i+1]; j++) {
1602       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1603       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1604       aa = a->a + j*bs2;
1605       for (k=0; k<bs; k++) {
1606         for (l=0; l<bs; l++) {
1607           if (PetscRealPart(*aa++) >=  0.) continue;
1608           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1609         }
1610       }
1611     }
1612   }
1613   color = PETSC_DRAW_CYAN;
1614   for (i=0,row=0; i<mbs; i++,row+=bs) {
1615     for (j=a->i[i]; j<a->i[i+1]; j++) {
1616       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1617       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1618       aa = a->a + j*bs2;
1619       for (k=0; k<bs; k++) {
1620         for (l=0; l<bs; l++) {
1621           if (PetscRealPart(*aa++) != 0.) continue;
1622           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1623         }
1624       }
1625     }
1626   }
1627 
1628   color = PETSC_DRAW_RED;
1629   for (i=0,row=0; i<mbs; i++,row+=bs) {
1630     for (j=a->i[i]; j<a->i[i+1]; j++) {
1631       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1632       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1633       aa = a->a + j*bs2;
1634       for (k=0; k<bs; k++) {
1635         for (l=0; l<bs; l++) {
1636           if (PetscRealPart(*aa++) <= 0.) continue;
1637           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1638         }
1639       }
1640     }
1641   }
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 #undef __FUNCT__
1646 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1647 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1648 {
1649   PetscErrorCode ierr;
1650   PetscReal      xl,yl,xr,yr,w,h;
1651   PetscDraw      draw;
1652   PetscTruth     isnull;
1653 
1654   PetscFunctionBegin;
1655 
1656   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1657   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1658 
1659   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1660   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1661   xr += w;    yr += h;  xl = -w;     yl = -h;
1662   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1663   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1664   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 #undef __FUNCT__
1669 #define __FUNCT__ "MatView_SeqBAIJ"
1670 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1671 {
1672   PetscErrorCode ierr;
1673   PetscTruth     iascii,isbinary,isdraw;
1674 
1675   PetscFunctionBegin;
1676   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1677   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1678   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1679   if (iascii){
1680     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1681   } else if (isbinary) {
1682     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1683   } else if (isdraw) {
1684     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1685   } else {
1686     Mat B;
1687     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1688     ierr = MatView(B,viewer);CHKERRQ(ierr);
1689     ierr = MatDestroy(B);CHKERRQ(ierr);
1690   }
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1697 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1698 {
1699   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1700   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1701   PetscInt    *ai = a->i,*ailen = a->ilen;
1702   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1703   MatScalar   *ap,*aa = a->a;
1704 
1705   PetscFunctionBegin;
1706   for (k=0; k<m; k++) { /* loop over rows */
1707     row  = im[k]; brow = row/bs;
1708     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1709     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1710     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1711     nrow = ailen[brow];
1712     for (l=0; l<n; l++) { /* loop over columns */
1713       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1714       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1715       col  = in[l] ;
1716       bcol = col/bs;
1717       cidx = col%bs;
1718       ridx = row%bs;
1719       high = nrow;
1720       low  = 0; /* assume unsorted */
1721       while (high-low > 5) {
1722         t = (low+high)/2;
1723         if (rp[t] > bcol) high = t;
1724         else             low  = t;
1725       }
1726       for (i=low; i<high; i++) {
1727         if (rp[i] > bcol) break;
1728         if (rp[i] == bcol) {
1729           *v++ = ap[bs2*i+bs*cidx+ridx];
1730           goto finished;
1731         }
1732       }
1733       *v++ = 0.0;
1734       finished:;
1735     }
1736   }
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 #define CHUNKSIZE 10
1741 #undef __FUNCT__
1742 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1743 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1744 {
1745   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1746   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1747   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1748   PetscErrorCode    ierr;
1749   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1750   PetscTruth        roworiented=a->roworiented;
1751   const PetscScalar *value = v;
1752   MatScalar         *ap,*aa = a->a,*bap;
1753 
1754   PetscFunctionBegin;
1755   if (roworiented) {
1756     stepval = (n-1)*bs;
1757   } else {
1758     stepval = (m-1)*bs;
1759   }
1760   for (k=0; k<m; k++) { /* loop over added rows */
1761     row  = im[k];
1762     if (row < 0) continue;
1763 #if defined(PETSC_USE_DEBUG)
1764     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1765 #endif
1766     rp   = aj + ai[row];
1767     ap   = aa + bs2*ai[row];
1768     rmax = imax[row];
1769     nrow = ailen[row];
1770     low  = 0;
1771     high = nrow;
1772     for (l=0; l<n; l++) { /* loop over added columns */
1773       if (in[l] < 0) continue;
1774 #if defined(PETSC_USE_DEBUG)
1775       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1776 #endif
1777       col = in[l];
1778       if (roworiented) {
1779         value = v + k*(stepval+bs)*bs + l*bs;
1780       } else {
1781         value = v + l*(stepval+bs)*bs + k*bs;
1782       }
1783       if (col <= lastcol) low = 0; else high = nrow;
1784       lastcol = col;
1785       while (high-low > 7) {
1786         t = (low+high)/2;
1787         if (rp[t] > col) high = t;
1788         else             low  = t;
1789       }
1790       for (i=low; i<high; i++) {
1791         if (rp[i] > col) break;
1792         if (rp[i] == col) {
1793           bap  = ap +  bs2*i;
1794           if (roworiented) {
1795             if (is == ADD_VALUES) {
1796               for (ii=0; ii<bs; ii++,value+=stepval) {
1797                 for (jj=ii; jj<bs2; jj+=bs) {
1798                   bap[jj] += *value++;
1799                 }
1800               }
1801             } else {
1802               for (ii=0; ii<bs; ii++,value+=stepval) {
1803                 for (jj=ii; jj<bs2; jj+=bs) {
1804                   bap[jj] = *value++;
1805                 }
1806               }
1807             }
1808           } else {
1809             if (is == ADD_VALUES) {
1810               for (ii=0; ii<bs; ii++,value+=stepval) {
1811                 for (jj=0; jj<bs; jj++) {
1812                   *bap++ += *value++;
1813                 }
1814               }
1815             } else {
1816               for (ii=0; ii<bs; ii++,value+=stepval) {
1817                 for (jj=0; jj<bs; jj++) {
1818                   *bap++  = *value++;
1819                 }
1820               }
1821             }
1822           }
1823           goto noinsert2;
1824         }
1825       }
1826       if (nonew == 1) goto noinsert2;
1827       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1828       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1829       N = nrow++ - 1; high++;
1830       /* shift up all the later entries in this row */
1831       for (ii=N; ii>=i; ii--) {
1832         rp[ii+1] = rp[ii];
1833         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1834       }
1835       if (N >= i) {
1836         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1837       }
1838       rp[i] = col;
1839       bap   = ap +  bs2*i;
1840       if (roworiented) {
1841         for (ii=0; ii<bs; ii++,value+=stepval) {
1842           for (jj=ii; jj<bs2; jj+=bs) {
1843             bap[jj] = *value++;
1844           }
1845         }
1846       } else {
1847         for (ii=0; ii<bs; ii++,value+=stepval) {
1848           for (jj=0; jj<bs; jj++) {
1849             *bap++  = *value++;
1850           }
1851         }
1852       }
1853       noinsert2:;
1854       low = i;
1855     }
1856     ailen[row] = nrow;
1857   }
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1863 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1864 {
1865   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1866   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1867   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1868   PetscErrorCode ierr;
1869   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1870   MatScalar      *aa = a->a,*ap;
1871   PetscReal      ratio=0.6;
1872 
1873   PetscFunctionBegin;
1874   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1875 
1876   if (m) rmax = ailen[0];
1877   for (i=1; i<mbs; i++) {
1878     /* move each row back by the amount of empty slots (fshift) before it*/
1879     fshift += imax[i-1] - ailen[i-1];
1880     rmax   = PetscMax(rmax,ailen[i]);
1881     if (fshift) {
1882       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1883       N = ailen[i];
1884       for (j=0; j<N; j++) {
1885         ip[j-fshift] = ip[j];
1886         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1887       }
1888     }
1889     ai[i] = ai[i-1] + ailen[i-1];
1890   }
1891   if (mbs) {
1892     fshift += imax[mbs-1] - ailen[mbs-1];
1893     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1894   }
1895   /* reset ilen and imax for each row */
1896   for (i=0; i<mbs; i++) {
1897     ailen[i] = imax[i] = ai[i+1] - ai[i];
1898   }
1899   a->nz = ai[mbs];
1900 
1901   /* diagonals may have moved, so kill the diagonal pointers */
1902   a->idiagvalid = PETSC_FALSE;
1903   if (fshift && a->diag) {
1904     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1905     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1906     a->diag = 0;
1907   }
1908   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1909   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1910   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1911   a->reallocs          = 0;
1912   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1913 
1914   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1915   if (a->compressedrow.use){
1916     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1917   }
1918 
1919   A->same_nonzero = PETSC_TRUE;
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 /*
1924    This function returns an array of flags which indicate the locations of contiguous
1925    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1926    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1927    Assume: sizes should be long enough to hold all the values.
1928 */
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1931 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1932 {
1933   PetscInt   i,j,k,row;
1934   PetscTruth flg;
1935 
1936   PetscFunctionBegin;
1937   for (i=0,j=0; i<n; j++) {
1938     row = idx[i];
1939     if (row%bs!=0) { /* Not the begining of a block */
1940       sizes[j] = 1;
1941       i++;
1942     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1943       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1944       i++;
1945     } else { /* Begining of the block, so check if the complete block exists */
1946       flg = PETSC_TRUE;
1947       for (k=1; k<bs; k++) {
1948         if (row+k != idx[i+k]) { /* break in the block */
1949           flg = PETSC_FALSE;
1950           break;
1951         }
1952       }
1953       if (flg) { /* No break in the bs */
1954         sizes[j] = bs;
1955         i+= bs;
1956       } else {
1957         sizes[j] = 1;
1958         i++;
1959       }
1960     }
1961   }
1962   *bs_max = j;
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1968 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1969 {
1970   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1971   PetscErrorCode ierr;
1972   PetscInt       i,j,k,count,*rows;
1973   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1974   PetscScalar    zero = 0.0;
1975   MatScalar      *aa;
1976 
1977   PetscFunctionBegin;
1978   /* Make a copy of the IS and  sort it */
1979   /* allocate memory for rows,sizes */
1980   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1981   sizes = rows + is_n;
1982 
1983   /* copy IS values to rows, and sort them */
1984   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1985   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1986   if (baij->keepzeroedrows) {
1987     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1988     bs_max = is_n;
1989     A->same_nonzero = PETSC_TRUE;
1990   } else {
1991     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1992     A->same_nonzero = PETSC_FALSE;
1993   }
1994 
1995   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1996     row   = rows[j];
1997     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1998     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1999     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2000     if (sizes[i] == bs && !baij->keepzeroedrows) {
2001       if (diag != 0.0) {
2002         if (baij->ilen[row/bs] > 0) {
2003           baij->ilen[row/bs]       = 1;
2004           baij->j[baij->i[row/bs]] = row/bs;
2005           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2006         }
2007         /* Now insert all the diagonal values for this bs */
2008         for (k=0; k<bs; k++) {
2009           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2010         }
2011       } else { /* (diag == 0.0) */
2012         baij->ilen[row/bs] = 0;
2013       } /* end (diag == 0.0) */
2014     } else { /* (sizes[i] != bs) */
2015 #if defined (PETSC_USE_DEBUG)
2016       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2017 #endif
2018       for (k=0; k<count; k++) {
2019         aa[0] =  zero;
2020         aa    += bs;
2021       }
2022       if (diag != 0.0) {
2023         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2024       }
2025     }
2026   }
2027 
2028   ierr = PetscFree(rows);CHKERRQ(ierr);
2029   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2035 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2036 {
2037   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2038   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2039   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2040   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
2041   PetscErrorCode ierr;
2042   PetscInt       ridx,cidx,bs2=a->bs2;
2043   PetscTruth     roworiented=a->roworiented;
2044   MatScalar      *ap,value,*aa=a->a,*bap;
2045 
2046   PetscFunctionBegin;
2047   for (k=0; k<m; k++) { /* loop over added rows */
2048     row  = im[k];
2049     brow = row/bs;
2050     if (row < 0) continue;
2051 #if defined(PETSC_USE_DEBUG)
2052     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
2053 #endif
2054     rp   = aj + ai[brow];
2055     ap   = aa + bs2*ai[brow];
2056     rmax = imax[brow];
2057     nrow = ailen[brow];
2058     low  = 0;
2059     high = nrow;
2060     for (l=0; l<n; l++) { /* loop over added columns */
2061       if (in[l] < 0) continue;
2062 #if defined(PETSC_USE_DEBUG)
2063       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
2064 #endif
2065       col = in[l]; bcol = col/bs;
2066       ridx = row % bs; cidx = col % bs;
2067       if (roworiented) {
2068         value = v[l + k*n];
2069       } else {
2070         value = v[k + l*m];
2071       }
2072       if (col <= lastcol) low = 0; else high = nrow;
2073       lastcol = col;
2074       while (high-low > 7) {
2075         t = (low+high)/2;
2076         if (rp[t] > bcol) high = t;
2077         else              low  = t;
2078       }
2079       for (i=low; i<high; i++) {
2080         if (rp[i] > bcol) break;
2081         if (rp[i] == bcol) {
2082           bap  = ap +  bs2*i + bs*cidx + ridx;
2083           if (is == ADD_VALUES) *bap += value;
2084           else                  *bap  = value;
2085           goto noinsert1;
2086         }
2087       }
2088       if (nonew == 1) goto noinsert1;
2089       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2090       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2091       N = nrow++ - 1; high++;
2092       /* shift up all the later entries in this row */
2093       for (ii=N; ii>=i; ii--) {
2094         rp[ii+1] = rp[ii];
2095         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2096       }
2097       if (N>=i) {
2098         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2099       }
2100       rp[i]                      = bcol;
2101       ap[bs2*i + bs*cidx + ridx] = value;
2102       a->nz++;
2103       noinsert1:;
2104       low = i;
2105     }
2106     ailen[brow] = nrow;
2107   }
2108   A->same_nonzero = PETSC_FALSE;
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 
2113 #undef __FUNCT__
2114 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2115 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
2116 {
2117   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2118   Mat            outA;
2119   PetscErrorCode ierr;
2120   PetscTruth     row_identity,col_identity;
2121 
2122   PetscFunctionBegin;
2123   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2124   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2125   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2126   if (!row_identity || !col_identity) {
2127     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2128   }
2129 
2130   outA          = inA;
2131   inA->factor   = MAT_FACTOR_LU;
2132 
2133   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2134 
2135   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2136   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2137   a->row = row;
2138   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2139   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2140   a->col = col;
2141 
2142   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2143   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2144   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2145 
2146   /*
2147       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
2148       for ILU(0) factorization with natural ordering
2149   */
2150   if (inA->rmap.bs < 8) {
2151     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
2152   } else {
2153     if (!a->solve_work) {
2154       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2155       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2156     }
2157   }
2158 
2159   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
2160 
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 EXTERN_C_BEGIN
2165 #undef __FUNCT__
2166 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2167 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2168 {
2169   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2170   PetscInt    i,nz,nbs;
2171 
2172   PetscFunctionBegin;
2173   nz  = baij->maxnz/baij->bs2;
2174   nbs = baij->nbs;
2175   for (i=0; i<nz; i++) {
2176     baij->j[i] = indices[i];
2177   }
2178   baij->nz = nz;
2179   for (i=0; i<nbs; i++) {
2180     baij->ilen[i] = baij->imax[i];
2181   }
2182 
2183   PetscFunctionReturn(0);
2184 }
2185 EXTERN_C_END
2186 
2187 #undef __FUNCT__
2188 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2189 /*@
2190     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2191        in the matrix.
2192 
2193   Input Parameters:
2194 +  mat - the SeqBAIJ matrix
2195 -  indices - the column indices
2196 
2197   Level: advanced
2198 
2199   Notes:
2200     This can be called if you have precomputed the nonzero structure of the
2201   matrix and want to provide it to the matrix object to improve the performance
2202   of the MatSetValues() operation.
2203 
2204     You MUST have set the correct numbers of nonzeros per row in the call to
2205   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2206 
2207     MUST be called before any calls to MatSetValues();
2208 
2209 @*/
2210 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2211 {
2212   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2213 
2214   PetscFunctionBegin;
2215   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2216   PetscValidPointer(indices,2);
2217   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2218   if (f) {
2219     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2220   } else {
2221     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2222   }
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2228 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2229 {
2230   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2231   PetscErrorCode ierr;
2232   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2233   PetscReal      atmp;
2234   PetscScalar    *x,zero = 0.0;
2235   MatScalar      *aa;
2236   PetscInt       ncols,brow,krow,kcol;
2237 
2238   PetscFunctionBegin;
2239   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2240   bs   = A->rmap.bs;
2241   aa   = a->a;
2242   ai   = a->i;
2243   aj   = a->j;
2244   mbs  = a->mbs;
2245 
2246   ierr = VecSet(v,zero);CHKERRQ(ierr);
2247   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2248   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2249   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2250   for (i=0; i<mbs; i++) {
2251     ncols = ai[1] - ai[0]; ai++;
2252     brow  = bs*i;
2253     for (j=0; j<ncols; j++){
2254       for (kcol=0; kcol<bs; kcol++){
2255         for (krow=0; krow<bs; krow++){
2256           atmp = PetscAbsScalar(*aa);aa++;
2257           row = brow + krow;    /* row index */
2258           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2259           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2260         }
2261       }
2262       aj++;
2263     }
2264   }
2265   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatCopy_SeqBAIJ"
2271 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2272 {
2273   PetscErrorCode ierr;
2274 
2275   PetscFunctionBegin;
2276   /* If the two matrices have the same copy implementation, use fast copy. */
2277   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2278     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2279     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2280 
2281     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
2282       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2283     }
2284     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
2285   } else {
2286     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2287   }
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2293 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2294 {
2295   PetscErrorCode ierr;
2296 
2297   PetscFunctionBegin;
2298   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 #undef __FUNCT__
2303 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2304 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2305 {
2306   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2307   PetscFunctionBegin;
2308   *array = a->a;
2309   PetscFunctionReturn(0);
2310 }
2311 
2312 #undef __FUNCT__
2313 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2314 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2315 {
2316   PetscFunctionBegin;
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 #include "petscblaslapack.h"
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2323 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2324 {
2325   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2326   PetscErrorCode ierr;
2327   PetscInt       i,bs=Y->rmap.bs,j,bs2;
2328   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2329 
2330   PetscFunctionBegin;
2331   if (str == SAME_NONZERO_PATTERN) {
2332     PetscScalar alpha = a;
2333     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2334   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2335     if (y->xtoy && y->XtoY != X) {
2336       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2337       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2338     }
2339     if (!y->xtoy) { /* get xtoy */
2340       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2341       y->XtoY = X;
2342     }
2343     bs2 = bs*bs;
2344     for (i=0; i<x->nz; i++) {
2345       j = 0;
2346       while (j < bs2){
2347         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2348         j++;
2349       }
2350     }
2351     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2352   } else {
2353     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2354   }
2355   PetscFunctionReturn(0);
2356 }
2357 
2358 #undef __FUNCT__
2359 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2360 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2361 {
2362   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2363   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2364   MatScalar      *aa = a->a;
2365 
2366   PetscFunctionBegin;
2367   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 #undef __FUNCT__
2372 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2373 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2374 {
2375   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2376   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2377   MatScalar      *aa = a->a;
2378 
2379   PetscFunctionBegin;
2380   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 
2385 /* -------------------------------------------------------------------*/
2386 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2387        MatGetRow_SeqBAIJ,
2388        MatRestoreRow_SeqBAIJ,
2389        MatMult_SeqBAIJ_N,
2390 /* 4*/ MatMultAdd_SeqBAIJ_N,
2391        MatMultTranspose_SeqBAIJ,
2392        MatMultTransposeAdd_SeqBAIJ,
2393        MatSolve_SeqBAIJ_N,
2394        0,
2395        0,
2396 /*10*/ 0,
2397        MatLUFactor_SeqBAIJ,
2398        0,
2399        0,
2400        MatTranspose_SeqBAIJ,
2401 /*15*/ MatGetInfo_SeqBAIJ,
2402        MatEqual_SeqBAIJ,
2403        MatGetDiagonal_SeqBAIJ,
2404        MatDiagonalScale_SeqBAIJ,
2405        MatNorm_SeqBAIJ,
2406 /*20*/ 0,
2407        MatAssemblyEnd_SeqBAIJ,
2408        0,
2409        MatSetOption_SeqBAIJ,
2410        MatZeroEntries_SeqBAIJ,
2411 /*25*/ MatZeroRows_SeqBAIJ,
2412        MatLUFactorSymbolic_SeqBAIJ,
2413        MatLUFactorNumeric_SeqBAIJ_N,
2414        MatCholeskyFactorSymbolic_SeqBAIJ,
2415        MatCholeskyFactorNumeric_SeqBAIJ_N,
2416 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2417        MatILUFactorSymbolic_SeqBAIJ,
2418        MatICCFactorSymbolic_SeqBAIJ,
2419        MatGetArray_SeqBAIJ,
2420        MatRestoreArray_SeqBAIJ,
2421 /*35*/ MatDuplicate_SeqBAIJ,
2422        0,
2423        0,
2424        MatILUFactor_SeqBAIJ,
2425        0,
2426 /*40*/ MatAXPY_SeqBAIJ,
2427        MatGetSubMatrices_SeqBAIJ,
2428        MatIncreaseOverlap_SeqBAIJ,
2429        MatGetValues_SeqBAIJ,
2430        MatCopy_SeqBAIJ,
2431 /*45*/ 0,
2432        MatScale_SeqBAIJ,
2433        0,
2434        0,
2435        0,
2436 /*50*/ 0,
2437        MatGetRowIJ_SeqBAIJ,
2438        MatRestoreRowIJ_SeqBAIJ,
2439        0,
2440        0,
2441 /*55*/ 0,
2442        0,
2443        0,
2444        0,
2445        MatSetValuesBlocked_SeqBAIJ,
2446 /*60*/ MatGetSubMatrix_SeqBAIJ,
2447        MatDestroy_SeqBAIJ,
2448        MatView_SeqBAIJ,
2449        0,
2450        0,
2451 /*65*/ 0,
2452        0,
2453        0,
2454        0,
2455        0,
2456 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2457        MatConvert_Basic,
2458        0,
2459        0,
2460        0,
2461 /*75*/ 0,
2462        0,
2463        0,
2464        0,
2465        0,
2466 /*80*/ 0,
2467        0,
2468        0,
2469        0,
2470        MatLoad_SeqBAIJ,
2471 /*85*/ 0,
2472        0,
2473        0,
2474        0,
2475        0,
2476 /*90*/ 0,
2477        0,
2478        0,
2479        0,
2480        0,
2481 /*95*/ 0,
2482        0,
2483        0,
2484        0,
2485        0,
2486 /*100*/0,
2487        0,
2488        0,
2489        0,
2490        0,
2491 /*105*/0,
2492        MatRealPart_SeqBAIJ,
2493        MatImaginaryPart_SeqBAIJ,
2494        0,
2495        0,
2496 /*110*/0,
2497        0,
2498        0,
2499        0,
2500        MatMissingDiagonal_SeqBAIJ
2501 /*115*/
2502 };
2503 
2504 EXTERN_C_BEGIN
2505 #undef __FUNCT__
2506 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2507 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2508 {
2509   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2510   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2511   PetscErrorCode ierr;
2512 
2513   PetscFunctionBegin;
2514   if (aij->nonew != 1) {
2515     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2516   }
2517 
2518   /* allocate space for values if not already there */
2519   if (!aij->saved_values) {
2520     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2521   }
2522 
2523   /* copy values over */
2524   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2525   PetscFunctionReturn(0);
2526 }
2527 EXTERN_C_END
2528 
2529 EXTERN_C_BEGIN
2530 #undef __FUNCT__
2531 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2532 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2533 {
2534   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2535   PetscErrorCode ierr;
2536   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2537 
2538   PetscFunctionBegin;
2539   if (aij->nonew != 1) {
2540     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2541   }
2542   if (!aij->saved_values) {
2543     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2544   }
2545 
2546   /* copy values over */
2547   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2548   PetscFunctionReturn(0);
2549 }
2550 EXTERN_C_END
2551 
2552 EXTERN_C_BEGIN
2553 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2554 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2555 EXTERN_C_END
2556 
2557 EXTERN_C_BEGIN
2558 #undef __FUNCT__
2559 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2560 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2561 {
2562   Mat_SeqBAIJ    *b;
2563   PetscErrorCode ierr;
2564   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2565   PetscTruth     flg,skipallocation = PETSC_FALSE;
2566 
2567   PetscFunctionBegin;
2568 
2569   if (nz == MAT_SKIP_ALLOCATION) {
2570     skipallocation = PETSC_TRUE;
2571     nz             = 0;
2572   }
2573 
2574   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2575     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2576   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2577 
2578   if (nnz && newbs != bs) {
2579     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2580   }
2581   bs   = newbs;
2582 
2583   B->rmap.bs = B->cmap.bs = bs;
2584   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2585   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2586 
2587   B->preallocated = PETSC_TRUE;
2588 
2589   mbs  = B->rmap.n/bs;
2590   nbs  = B->cmap.n/bs;
2591   bs2  = bs*bs;
2592 
2593   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2594     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2595   }
2596 
2597   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2598   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2599   if (nnz) {
2600     for (i=0; i<mbs; i++) {
2601       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2602       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2603     }
2604   }
2605 
2606   b       = (Mat_SeqBAIJ*)B->data;
2607   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2608     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2609   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2610 
2611   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2612   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2613   if (!flg) {
2614     switch (bs) {
2615     case 1:
2616       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2617       B->ops->mult            = MatMult_SeqBAIJ_1;
2618       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2619       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2620       break;
2621     case 2:
2622       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2623       B->ops->mult            = MatMult_SeqBAIJ_2;
2624       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2625       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2626       break;
2627     case 3:
2628       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2629       B->ops->mult            = MatMult_SeqBAIJ_3;
2630       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2631       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2632       break;
2633     case 4:
2634       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2635       B->ops->mult            = MatMult_SeqBAIJ_4;
2636       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2637       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2638       break;
2639     case 5:
2640       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2641       B->ops->mult            = MatMult_SeqBAIJ_5;
2642       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2643       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2644       break;
2645     case 6:
2646       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2647       B->ops->mult            = MatMult_SeqBAIJ_6;
2648       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2649       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2650       break;
2651     case 7:
2652       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2653       B->ops->mult            = MatMult_SeqBAIJ_7;
2654       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2655       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2656       break;
2657     default:
2658       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2659       B->ops->mult            = MatMult_SeqBAIJ_N;
2660       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2661       break;
2662     }
2663   }
2664   B->rmap.bs      = bs;
2665   b->mbs     = mbs;
2666   b->nbs     = nbs;
2667   if (!skipallocation) {
2668     if (!b->imax) {
2669       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2670     }
2671     /* b->ilen will count nonzeros in each block row so far. */
2672     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2673     if (!nnz) {
2674       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2675       else if (nz <= 0)        nz = 1;
2676       for (i=0; i<mbs; i++) b->imax[i] = nz;
2677       nz = nz*mbs;
2678     } else {
2679       nz = 0;
2680       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2681     }
2682 
2683     /* allocate the matrix space */
2684     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2685     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2686     ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2687     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2688     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2689     b->singlemalloc = PETSC_TRUE;
2690     b->i[0] = 0;
2691     for (i=1; i<mbs+1; i++) {
2692       b->i[i] = b->i[i-1] + b->imax[i-1];
2693     }
2694     b->free_a     = PETSC_TRUE;
2695     b->free_ij    = PETSC_TRUE;
2696   } else {
2697     b->free_a     = PETSC_FALSE;
2698     b->free_ij    = PETSC_FALSE;
2699   }
2700 
2701   B->rmap.bs          = bs;
2702   b->bs2              = bs2;
2703   b->mbs              = mbs;
2704   b->nz               = 0;
2705   b->maxnz            = nz*bs2;
2706   B->info.nz_unneeded = (PetscReal)b->maxnz;
2707   PetscFunctionReturn(0);
2708 }
2709 EXTERN_C_END
2710 
2711 EXTERN_C_BEGIN
2712 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2713 EXTERN_C_END
2714 
2715 /*MC
2716    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2717    block sparse compressed row format.
2718 
2719    Options Database Keys:
2720 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2721 
2722   Level: beginner
2723 
2724 .seealso: MatCreateSeqBAIJ()
2725 M*/
2726 
2727 
2728 EXTERN_C_BEGIN
2729 #undef __FUNCT__
2730 #define __FUNCT__ "MatCreate_SeqBAIJ"
2731 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2732 {
2733   PetscErrorCode ierr;
2734   PetscMPIInt    size;
2735   Mat_SeqBAIJ    *b;
2736 
2737   PetscFunctionBegin;
2738   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2739   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2740 
2741   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2742   B->data = (void*)b;
2743   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2744   B->mapping          = 0;
2745   b->row              = 0;
2746   b->col              = 0;
2747   b->icol             = 0;
2748   b->reallocs         = 0;
2749   b->saved_values     = 0;
2750 
2751   b->roworiented      = PETSC_TRUE;
2752   b->nonew            = 0;
2753   b->diag             = 0;
2754   b->solve_work       = 0;
2755   b->mult_work        = 0;
2756   B->spptr            = 0;
2757   B->info.nz_unneeded = (PetscReal)b->maxnz;
2758   b->keepzeroedrows   = PETSC_FALSE;
2759   b->xtoy              = 0;
2760   b->XtoY              = 0;
2761   b->compressedrow.use     = PETSC_FALSE;
2762   b->compressedrow.nrows   = 0;
2763   b->compressedrow.i       = PETSC_NULL;
2764   b->compressedrow.rindex  = PETSC_NULL;
2765   b->compressedrow.checked = PETSC_FALSE;
2766   B->same_nonzero          = PETSC_FALSE;
2767 
2768   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2769                                      "MatGetFactor_seqbaij_petsc",
2770                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
2771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2772                                      "MatInvertBlockDiagonal_SeqBAIJ",
2773                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2774   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2775                                      "MatStoreValues_SeqBAIJ",
2776                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2777   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2778                                      "MatRetrieveValues_SeqBAIJ",
2779                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2780   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2781                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2782                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2783   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2784                                      "MatConvert_SeqBAIJ_SeqAIJ",
2785                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2786   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2787                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2788                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2789   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2790                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2791                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2792   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2793   PetscFunctionReturn(0);
2794 }
2795 EXTERN_C_END
2796 
2797 #undef __FUNCT__
2798 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
2799 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2800 {
2801   Mat            C = *B;
2802   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2803   PetscErrorCode ierr;
2804   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2805 
2806   PetscFunctionBegin;
2807   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2808 
2809   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2810   for (i=0; i<mbs; i++) {
2811     c->imax[i] = a->imax[i];
2812     c->ilen[i] = a->ilen[i];
2813   }
2814 
2815   /* allocate the matrix space */
2816   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2817   c->singlemalloc = PETSC_TRUE;
2818   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2819   if (mbs > 0) {
2820     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2821     if (cpvalues == MAT_COPY_VALUES) {
2822       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2823     } else {
2824       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2825     }
2826   }
2827   c->roworiented = a->roworiented;
2828   c->nonew       = a->nonew;
2829   ierr = PetscMapCopy(((PetscObject)A)->comm,&A->rmap,&C->rmap);CHKERRQ(ierr);
2830   ierr = PetscMapCopy(((PetscObject)A)->comm,&A->cmap,&C->cmap);CHKERRQ(ierr);
2831   c->bs2         = a->bs2;
2832   c->mbs         = a->mbs;
2833   c->nbs         = a->nbs;
2834 
2835   if (a->diag) {
2836     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2837     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2838     for (i=0; i<mbs; i++) {
2839       c->diag[i] = a->diag[i];
2840     }
2841   } else c->diag        = 0;
2842   c->nz                 = a->nz;
2843   c->maxnz              = a->maxnz;
2844   c->solve_work         = 0;
2845   c->mult_work          = 0;
2846   c->free_a             = PETSC_TRUE;
2847   c->free_ij            = PETSC_TRUE;
2848   C->preallocated       = PETSC_TRUE;
2849   C->assembled          = PETSC_TRUE;
2850 
2851   c->compressedrow.use     = a->compressedrow.use;
2852   c->compressedrow.nrows   = a->compressedrow.nrows;
2853   c->compressedrow.checked = a->compressedrow.checked;
2854   if ( a->compressedrow.checked && a->compressedrow.use){
2855     i = a->compressedrow.nrows;
2856     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2857     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2858     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2859     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2860   } else {
2861     c->compressedrow.use    = PETSC_FALSE;
2862     c->compressedrow.i      = PETSC_NULL;
2863     c->compressedrow.rindex = PETSC_NULL;
2864   }
2865   C->same_nonzero = A->same_nonzero;
2866   *B = C;
2867   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2868   PetscFunctionReturn(0);
2869 }
2870 
2871 #undef __FUNCT__
2872 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2873 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2874 {
2875   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2876   PetscErrorCode ierr;
2877   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2878 
2879   PetscFunctionBegin;
2880   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
2881   ierr = MatSetSizes(*B,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2882   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
2883   ierr = MatDuplicateNoCreate_SeqBAIJ(A,cpvalues,B);
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 #undef __FUNCT__
2888 #define __FUNCT__ "MatLoad_SeqBAIJ"
2889 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2890 {
2891   Mat_SeqBAIJ    *a;
2892   Mat            B;
2893   PetscErrorCode ierr;
2894   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2895   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2896   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2897   PetscInt       *masked,nmask,tmp,bs2,ishift;
2898   PetscMPIInt    size;
2899   int            fd;
2900   PetscScalar    *aa;
2901   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2902 
2903   PetscFunctionBegin;
2904   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2905     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2906   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2907   bs2  = bs*bs;
2908 
2909   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2910   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2911   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2912   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2913   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2914   M = header[1]; N = header[2]; nz = header[3];
2915 
2916   if (header[3] < 0) {
2917     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2918   }
2919 
2920   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2921 
2922   /*
2923      This code adds extra rows to make sure the number of rows is
2924     divisible by the blocksize
2925   */
2926   mbs        = M/bs;
2927   extra_rows = bs - M + bs*(mbs);
2928   if (extra_rows == bs) extra_rows = 0;
2929   else                  mbs++;
2930   if (extra_rows) {
2931     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2932   }
2933 
2934   /* read in row lengths */
2935   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2936   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2937   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2938 
2939   /* read in column indices */
2940   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2941   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2942   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2943 
2944   /* loop over row lengths determining block row lengths */
2945   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2946   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2947   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2948   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2949   masked   = mask + mbs;
2950   rowcount = 0; nzcount = 0;
2951   for (i=0; i<mbs; i++) {
2952     nmask = 0;
2953     for (j=0; j<bs; j++) {
2954       kmax = rowlengths[rowcount];
2955       for (k=0; k<kmax; k++) {
2956         tmp = jj[nzcount++]/bs;
2957         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2958       }
2959       rowcount++;
2960     }
2961     browlengths[i] += nmask;
2962     /* zero out the mask elements we set */
2963     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2964   }
2965 
2966   /* create our matrix */
2967   ierr = MatCreate(comm,&B);
2968   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2969   ierr = MatSetType(B,type);CHKERRQ(ierr);
2970   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2971   a = (Mat_SeqBAIJ*)B->data;
2972 
2973   /* set matrix "i" values */
2974   a->i[0] = 0;
2975   for (i=1; i<= mbs; i++) {
2976     a->i[i]      = a->i[i-1] + browlengths[i-1];
2977     a->ilen[i-1] = browlengths[i-1];
2978   }
2979   a->nz         = 0;
2980   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2981 
2982   /* read in nonzero values */
2983   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2984   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2985   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2986 
2987   /* set "a" and "j" values into matrix */
2988   nzcount = 0; jcount = 0;
2989   for (i=0; i<mbs; i++) {
2990     nzcountb = nzcount;
2991     nmask    = 0;
2992     for (j=0; j<bs; j++) {
2993       kmax = rowlengths[i*bs+j];
2994       for (k=0; k<kmax; k++) {
2995         tmp = jj[nzcount++]/bs;
2996 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2997       }
2998     }
2999     /* sort the masked values */
3000     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3001 
3002     /* set "j" values into matrix */
3003     maskcount = 1;
3004     for (j=0; j<nmask; j++) {
3005       a->j[jcount++]  = masked[j];
3006       mask[masked[j]] = maskcount++;
3007     }
3008     /* set "a" values into matrix */
3009     ishift = bs2*a->i[i];
3010     for (j=0; j<bs; j++) {
3011       kmax = rowlengths[i*bs+j];
3012       for (k=0; k<kmax; k++) {
3013         tmp       = jj[nzcountb]/bs ;
3014         block     = mask[tmp] - 1;
3015         point     = jj[nzcountb] - bs*tmp;
3016         idx       = ishift + bs2*block + j + bs*point;
3017         a->a[idx] = (MatScalar)aa[nzcountb++];
3018       }
3019     }
3020     /* zero out the mask elements we set */
3021     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3022   }
3023   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3024 
3025   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3026   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3027   ierr = PetscFree(aa);CHKERRQ(ierr);
3028   ierr = PetscFree(jj);CHKERRQ(ierr);
3029   ierr = PetscFree(mask);CHKERRQ(ierr);
3030 
3031   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3032   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3033   ierr = MatView_Private(B);CHKERRQ(ierr);
3034 
3035   *A = B;
3036   PetscFunctionReturn(0);
3037 }
3038 
3039 #undef __FUNCT__
3040 #define __FUNCT__ "MatCreateSeqBAIJ"
3041 /*@C
3042    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3043    compressed row) format.  For good matrix assembly performance the
3044    user should preallocate the matrix storage by setting the parameter nz
3045    (or the array nnz).  By setting these parameters accurately, performance
3046    during matrix assembly can be increased by more than a factor of 50.
3047 
3048    Collective on MPI_Comm
3049 
3050    Input Parameters:
3051 +  comm - MPI communicator, set to PETSC_COMM_SELF
3052 .  bs - size of block
3053 .  m - number of rows
3054 .  n - number of columns
3055 .  nz - number of nonzero blocks  per block row (same for all rows)
3056 -  nnz - array containing the number of nonzero blocks in the various block rows
3057          (possibly different for each block row) or PETSC_NULL
3058 
3059    Output Parameter:
3060 .  A - the matrix
3061 
3062    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3063    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3064    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3065    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3066 
3067    Options Database Keys:
3068 .   -mat_no_unroll - uses code that does not unroll the loops in the
3069                      block calculations (much slower)
3070 .    -mat_block_size - size of the blocks to use
3071 
3072    Level: intermediate
3073 
3074    Notes:
3075    The number of rows and columns must be divisible by blocksize.
3076 
3077    If the nnz parameter is given then the nz parameter is ignored
3078 
3079    A nonzero block is any block that as 1 or more nonzeros in it
3080 
3081    The block AIJ format is fully compatible with standard Fortran 77
3082    storage.  That is, the stored row and column indices can begin at
3083    either one (as in Fortran) or zero.  See the users' manual for details.
3084 
3085    Specify the preallocated storage with either nz or nnz (not both).
3086    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3087    allocation.  For additional details, see the users manual chapter on
3088    matrices.
3089 
3090 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3091 @*/
3092 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3093 {
3094   PetscErrorCode ierr;
3095 
3096   PetscFunctionBegin;
3097   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3098   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3099   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3100   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3101   PetscFunctionReturn(0);
3102 }
3103 
3104 #undef __FUNCT__
3105 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3106 /*@C
3107    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3108    per row in the matrix. For good matrix assembly performance the
3109    user should preallocate the matrix storage by setting the parameter nz
3110    (or the array nnz).  By setting these parameters accurately, performance
3111    during matrix assembly can be increased by more than a factor of 50.
3112 
3113    Collective on MPI_Comm
3114 
3115    Input Parameters:
3116 +  A - the matrix
3117 .  bs - size of block
3118 .  nz - number of block nonzeros per block row (same for all rows)
3119 -  nnz - array containing the number of block nonzeros in the various block rows
3120          (possibly different for each block row) or PETSC_NULL
3121 
3122    Options Database Keys:
3123 .   -mat_no_unroll - uses code that does not unroll the loops in the
3124                      block calculations (much slower)
3125 .    -mat_block_size - size of the blocks to use
3126 
3127    Level: intermediate
3128 
3129    Notes:
3130    If the nnz parameter is given then the nz parameter is ignored
3131 
3132    You can call MatGetInfo() to get information on how effective the preallocation was;
3133    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3134    You can also run with the option -info and look for messages with the string
3135    malloc in them to see if additional memory allocation was needed.
3136 
3137    The block AIJ format is fully compatible with standard Fortran 77
3138    storage.  That is, the stored row and column indices can begin at
3139    either one (as in Fortran) or zero.  See the users' manual for details.
3140 
3141    Specify the preallocated storage with either nz or nnz (not both).
3142    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3143    allocation.  For additional details, see the users manual chapter on
3144    matrices.
3145 
3146 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3147 @*/
3148 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3149 {
3150   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3151 
3152   PetscFunctionBegin;
3153   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3154   if (f) {
3155     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3156   }
3157   PetscFunctionReturn(0);
3158 }
3159 
3160 #undef __FUNCT__
3161 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3162 /*@
3163      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3164               (upper triangular entries in CSR format) provided by the user.
3165 
3166      Collective on MPI_Comm
3167 
3168    Input Parameters:
3169 +  comm - must be an MPI communicator of size 1
3170 .  bs - size of block
3171 .  m - number of rows
3172 .  n - number of columns
3173 .  i - row indices
3174 .  j - column indices
3175 -  a - matrix values
3176 
3177    Output Parameter:
3178 .  mat - the matrix
3179 
3180    Level: intermediate
3181 
3182    Notes:
3183        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3184     once the matrix is destroyed
3185 
3186        You cannot set new nonzero locations into this matrix, that will generate an error.
3187 
3188        The i and j indices are 0 based
3189 
3190 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3191 
3192 @*/
3193 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3194 {
3195   PetscErrorCode ierr;
3196   PetscInt       ii;
3197   Mat_SeqBAIJ    *baij;
3198 
3199   PetscFunctionBegin;
3200   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3201   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3202 
3203   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3204   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3205   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3206   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3207   baij = (Mat_SeqBAIJ*)(*mat)->data;
3208   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3209 
3210   baij->i = i;
3211   baij->j = j;
3212   baij->a = a;
3213   baij->singlemalloc = PETSC_FALSE;
3214   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3215   baij->free_a       = PETSC_FALSE;
3216   baij->free_ij       = PETSC_FALSE;
3217 
3218   for (ii=0; ii<m; ii++) {
3219     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3220 #if defined(PETSC_USE_DEBUG)
3221     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3222 #endif
3223   }
3224 #if defined(PETSC_USE_DEBUG)
3225   for (ii=0; ii<baij->i[m]; ii++) {
3226     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3227     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3228   }
3229 #endif
3230 
3231   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3232   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3233   PetscFunctionReturn(0);
3234 }
3235