xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 8ab949d8a86daa69f82db2e3a2e7f07f1f4d47d0)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
8 #include "../src/mat/blockinvert.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
12 /*@
13   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
14 
15   Collective on Mat
16 
17   Input Parameters:
18 . mat - the matrix
19 
20   Level: advanced
21 @*/
22 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
23 {
24   PetscErrorCode ierr,(*f)(Mat);
25 
26   PetscFunctionBegin;
27   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
28   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
29   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
30 
31   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
32   if (f) {
33     ierr = (*f)(mat);CHKERRQ(ierr);
34   } else {
35     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
36   }
37   PetscFunctionReturn(0);
38 }
39 
40 EXTERN_C_BEGIN
41 #undef __FUNCT__
42 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
43 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
44 {
45   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
46   PetscErrorCode ierr;
47   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5];
48   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25];
49   PetscReal      shift = 0.0;
50 
51   PetscFunctionBegin;
52   if (a->idiagvalid) PetscFunctionReturn(0);
53   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
54   diag_offset = a->diag;
55   if (!a->idiag) {
56     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
57     ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
58   }
59   diag  = a->idiag;
60   mdiag = a->idiag+bs*bs*mbs;
61   /* factor and invert each block */
62   switch (bs){
63     case 1:
64       for (i=0; i<mbs; i++) {
65         odiag = v + 1*diag_offset[i];
66         diag[0]  = odiag[0];
67         mdiag[0] = odiag[0];
68         diag[0]  = 1.0 / (diag[0] + shift);
69         diag    += 1;
70         mdiag   += 1;
71       }
72       break;
73     case 2:
74       for (i=0; i<mbs; i++) {
75         odiag   = v + 4*diag_offset[i];
76         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
78 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
79 	diag    += 4;
80 	mdiag   += 4;
81       }
82       break;
83     case 3:
84       for (i=0; i<mbs; i++) {
85         odiag    = v + 9*diag_offset[i];
86         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
87         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
88         diag[8]  = odiag[8];
89         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
90         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
91         mdiag[8] = odiag[8];
92 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
93 	diag    += 9;
94 	mdiag   += 9;
95       }
96       break;
97     case 4:
98       for (i=0; i<mbs; i++) {
99         odiag  = v + 16*diag_offset[i];
100         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
101         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
103 	diag  += 16;
104 	mdiag += 16;
105       }
106       break;
107     case 5:
108       for (i=0; i<mbs; i++) {
109         odiag = v + 25*diag_offset[i];
110         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
111         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
113 	diag  += 25;
114 	mdiag += 25;
115       }
116       break;
117     case 6:
118       for (i=0; i<mbs; i++) {
119         odiag = v + 36*diag_offset[i];
120         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
121         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
123 	diag  += 36;
124 	mdiag += 36;
125       }
126       break;
127     default:
128       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
129   }
130   a->idiagvalid = PETSC_TRUE;
131   PetscFunctionReturn(0);
132 }
133 EXTERN_C_END
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
137 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
138 {
139   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
140   PetscScalar        *x,x1,s1;
141   const PetscScalar  *b;
142   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
143   PetscErrorCode     ierr;
144   PetscInt           m = a->mbs,i,i2,nz,j;
145   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
149   its = its*lits;
150   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
155 
156   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
157 
158   diag  = a->diag;
159   idiag = a->idiag;
160   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
161   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
162 
163   if (flag & SOR_ZERO_INITIAL_GUESS) {
164     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
165       x[0] = b[0]*idiag[0];
166       i2     = 1;
167       idiag += 1;
168       for (i=1; i<m; i++) {
169         v     = aa + ai[i];
170         vi    = aj + ai[i];
171         nz    = diag[i] - ai[i];
172         s1    = b[i2];
173         for (j=0; j<nz; j++) {
174           s1 -= v[j]*x[vi[j]];
175         }
176         x[i2]   = idiag[0]*s1;
177         idiag   += 1;
178         i2      += 1;
179       }
180       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
181       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
182     }
183     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
184         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
185       i2    = 0;
186       mdiag = a->idiag+a->mbs;
187       for (i=0; i<m; i++) {
188         x1      = x[i2];
189         x[i2]   = mdiag[0]*x1;
190         mdiag  += 1;
191         i2     += 1;
192       }
193       ierr = PetscLogFlops(m);CHKERRQ(ierr);
194     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
195       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
196     }
197     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
198       idiag   = a->idiag+a->mbs - 1;
199       i2      = m - 1;
200       x1      = x[i2];
201       x[i2]   = idiag[0]*x1;
202       idiag -= 1;
203       i2    -= 1;
204       for (i=m-2; i>=0; i--) {
205         v     = aa + (diag[i]+1);
206         vi    = aj + diag[i] + 1;
207         nz    = ai[i+1] - diag[i] - 1;
208         s1    = x[i2];
209         for (j=0; j<nz; j++) {
210           s1 -= v[j]*x[vi[j]];
211         }
212         x[i2]   = idiag[0]*s1;
213         idiag   -= 1;
214         i2      -= 1;
215       }
216       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
217     }
218   } else {
219     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
220   }
221   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
222   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
223   PetscFunctionReturn(0);
224 }
225 
226 #undef __FUNCT__
227 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
228 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
229 {
230   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
231   PetscScalar        *x,x1,x2,s1,s2;
232   const PetscScalar  *b;
233   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
234   PetscErrorCode     ierr;
235   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
236   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
237 
238   PetscFunctionBegin;
239   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
240   its = its*lits;
241   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
242   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
243   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
244   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
245   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
246 
247   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
248 
249   diag  = a->diag;
250   idiag = a->idiag;
251   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
252   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
253 
254   if (flag & SOR_ZERO_INITIAL_GUESS) {
255     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
256       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
257       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
258       i2     = 2;
259       idiag += 4;
260       for (i=1; i<m; i++) {
261 	v     = aa + 4*ai[i];
262 	vi    = aj + ai[i];
263 	nz    = diag[i] - ai[i];
264 	s1    = b[i2]; s2 = b[i2+1];
265         for (j=0; j<nz; j++) {
266 	  idx  = 2*vi[j];
267           it   = 4*j;
268 	  x1   = x[idx]; x2 = x[1+idx];
269 	  s1  -= v[it]*x1 + v[it+2]*x2;
270 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
271 	}
272 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
273 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
274         idiag   += 4;
275         i2      += 2;
276       }
277       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
278       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
279     }
280     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
281         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
282       i2    = 0;
283       mdiag = a->idiag+4*a->mbs;
284       for (i=0; i<m; i++) {
285         x1      = x[i2]; x2 = x[i2+1];
286         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
287         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
288         mdiag  += 4;
289         i2     += 2;
290       }
291       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
292     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
293       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
294     }
295     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
296       idiag   = a->idiag+4*a->mbs - 4;
297       i2      = 2*m - 2;
298       x1      = x[i2]; x2 = x[i2+1];
299       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
300       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
301       idiag -= 4;
302       i2    -= 2;
303       for (i=m-2; i>=0; i--) {
304 	v     = aa + 4*(diag[i]+1);
305 	vi    = aj + diag[i] + 1;
306 	nz    = ai[i+1] - diag[i] - 1;
307 	s1    = x[i2]; s2 = x[i2+1];
308         for (j=0; j<nz; j++) {
309  	  idx  = 2*vi[j];
310           it   = 4*j;
311 	  x1   = x[idx]; x2 = x[1+idx];
312 	  s1  -= v[it]*x1 + v[it+2]*x2;
313 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
314 	}
315 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
316 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
317         idiag   -= 4;
318         i2      -= 2;
319       }
320       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
321     }
322   } else {
323     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
324   }
325   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
326   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
332 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
333 {
334   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
335   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
336   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
337   const PetscScalar  *b;
338   PetscErrorCode     ierr;
339   PetscInt           m = a->mbs,i,i2,nz,idx;
340   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
341 
342   PetscFunctionBegin;
343   its = its*lits;
344   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
345   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
346   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
347   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
348   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
349   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
350 
351   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
352 
353   diag  = a->diag;
354   idiag = a->idiag;
355   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
356   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
357 
358   if (flag & SOR_ZERO_INITIAL_GUESS) {
359     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
360       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
361       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
362       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
363       i2     = 3;
364       idiag += 9;
365       for (i=1; i<m; i++) {
366         v     = aa + 9*ai[i];
367         vi    = aj + ai[i];
368         nz    = diag[i] - ai[i];
369         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
370         while (nz--) {
371           idx  = 3*(*vi++);
372           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
373           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
374           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
375           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
376           v   += 9;
377         }
378         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
379         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
380         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
381         idiag   += 9;
382         i2      += 3;
383       }
384       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
385       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
386     }
387     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
388         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
389       i2    = 0;
390       mdiag = a->idiag+9*a->mbs;
391       for (i=0; i<m; i++) {
392         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
393         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
394         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
395         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
396         mdiag  += 9;
397         i2     += 3;
398       }
399       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
400     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404       idiag   = a->idiag+9*a->mbs - 9;
405       i2      = 3*m - 3;
406       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
407       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
408       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
409       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
410       idiag -= 9;
411       i2    -= 3;
412       for (i=m-2; i>=0; i--) {
413         v     = aa + 9*(diag[i]+1);
414         vi    = aj + diag[i] + 1;
415         nz    = ai[i+1] - diag[i] - 1;
416         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
417         while (nz--) {
418           idx  = 3*(*vi++);
419           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
420           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
421           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
422           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
423           v   += 9;
424         }
425         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
426         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
427         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
428         idiag   -= 9;
429         i2      -= 3;
430       }
431       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
432     }
433   } else {
434     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
435   }
436   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
437   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
443 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
444 {
445   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
446   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
447   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
448   const PetscScalar  *b;
449   PetscErrorCode     ierr;
450   PetscInt           m = a->mbs,i,i2,nz,idx;
451   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
452 
453   PetscFunctionBegin;
454   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
475       i2     = 4;
476       idiag += 16;
477       for (i=1; i<m; i++) {
478 	v     = aa + 16*ai[i];
479 	vi    = aj + ai[i];
480 	nz    = diag[i] - ai[i];
481 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
482 	while (nz--) {
483 	  idx  = 4*(*vi++);
484 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
485 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
486 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
487 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
488 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
489 	  v   += 16;
490 	}
491 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
492 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
493 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
494 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
495         idiag   += 16;
496         i2      += 4;
497       }
498       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
499       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
500     }
501     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
502         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
503       i2    = 0;
504       mdiag = a->idiag+16*a->mbs;
505       for (i=0; i<m; i++) {
506         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
507         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
508         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
509         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
510         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
511         mdiag  += 16;
512         i2     += 4;
513       }
514       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
515     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
516       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
517     }
518     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
519       idiag   = a->idiag+16*a->mbs - 16;
520       i2      = 4*m - 4;
521       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
522       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
523       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
524       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
525       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
526       idiag -= 16;
527       i2    -= 4;
528       for (i=m-2; i>=0; i--) {
529 	v     = aa + 16*(diag[i]+1);
530 	vi    = aj + diag[i] + 1;
531 	nz    = ai[i+1] - diag[i] - 1;
532 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
533 	while (nz--) {
534 	  idx  = 4*(*vi++);
535 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
536 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
537 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
538 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
539 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
540 	  v   += 16;
541 	}
542 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
543 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
544 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
545 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
546         idiag   -= 16;
547         i2      -= 4;
548       }
549       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
550     }
551   } else {
552     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
553   }
554   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
555   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
561 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
562 {
563   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
564   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
565   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
566   const PetscScalar  *b;
567   PetscErrorCode     ierr;
568   PetscInt           m = a->mbs,i,i2,nz,idx;
569   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
570 
571   PetscFunctionBegin;
572   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
573   its = its*lits;
574   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
575   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
576   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
577   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
578   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
579 
580   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
581 
582   diag  = a->diag;
583   idiag = a->idiag;
584   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
585   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
586 
587   if (flag & SOR_ZERO_INITIAL_GUESS) {
588     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
589       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
590       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
591       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
592       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
593       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
594       i2     = 5;
595       idiag += 25;
596       for (i=1; i<m; i++) {
597 	v     = aa + 25*ai[i];
598 	vi    = aj + ai[i];
599 	nz    = diag[i] - ai[i];
600 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
601 	while (nz--) {
602 	  idx  = 5*(*vi++);
603 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
604 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
605 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
606 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
607 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
608 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
609 	  v   += 25;
610 	}
611 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
612 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
613 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
614 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
615 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
616         idiag   += 25;
617         i2      += 5;
618       }
619       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
620       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
621     }
622     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
623         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
624       i2    = 0;
625       mdiag = a->idiag+25*a->mbs;
626       for (i=0; i<m; i++) {
627         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
628         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
629         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
630         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
631         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
632         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
633         mdiag  += 25;
634         i2     += 5;
635       }
636       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
637     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
638       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
639     }
640     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
641       idiag   = a->idiag+25*a->mbs - 25;
642       i2      = 5*m - 5;
643       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
644       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
645       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
646       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
647       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
648       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
649       idiag -= 25;
650       i2    -= 5;
651       for (i=m-2; i>=0; i--) {
652 	v     = aa + 25*(diag[i]+1);
653 	vi    = aj + diag[i] + 1;
654 	nz    = ai[i+1] - diag[i] - 1;
655 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
656 	while (nz--) {
657 	  idx  = 5*(*vi++);
658 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
659 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
660 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
661 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
662 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
663 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
664 	  v   += 25;
665 	}
666 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
667 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
668 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
669 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
670 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
671         idiag   -= 25;
672         i2      -= 5;
673       }
674       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
675     }
676   } else {
677     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
678   }
679   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
680   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 #undef __FUNCT__
685 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
686 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
687 {
688   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
689   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
690   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
691   const PetscScalar  *b;
692   PetscErrorCode     ierr;
693   PetscInt           m = a->mbs,i,i2,nz,idx;
694   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
695 
696   PetscFunctionBegin;
697   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
698   its = its*lits;
699   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
700   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
701   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
702   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
703   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
704 
705   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
706 
707   diag  = a->diag;
708   idiag = a->idiag;
709   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
710   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
711 
712   if (flag & SOR_ZERO_INITIAL_GUESS) {
713     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
714       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
715       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
716       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
717       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
718       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
719       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
720       i2     = 6;
721       idiag += 36;
722       for (i=1; i<m; i++) {
723         v     = aa + 36*ai[i];
724         vi    = aj + ai[i];
725         nz    = diag[i] - ai[i];
726         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
727         while (nz--) {
728           idx  = 6*(*vi++);
729           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
730           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
731           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
732           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
733           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
734           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
735           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
736           v   += 36;
737         }
738         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
739         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
740         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
741         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
742         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
743         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
744         idiag   += 36;
745         i2      += 6;
746       }
747       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
748       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
749     }
750     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
751         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
752       i2    = 0;
753       mdiag = a->idiag+36*a->mbs;
754       for (i=0; i<m; i++) {
755         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
756         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
757         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
758         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
759         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
760         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
761         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
762         mdiag  += 36;
763         i2     += 6;
764       }
765       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
766     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
767       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
768     }
769     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
770       idiag   = a->idiag+36*a->mbs - 36;
771       i2      = 6*m - 6;
772       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
773       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
774       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
775       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
776       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
777       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
778       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
779       idiag -= 36;
780       i2    -= 6;
781       for (i=m-2; i>=0; i--) {
782         v     = aa + 36*(diag[i]+1);
783         vi    = aj + diag[i] + 1;
784         nz    = ai[i+1] - diag[i] - 1;
785         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
786         while (nz--) {
787           idx  = 6*(*vi++);
788           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
789           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
790           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
791           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
792           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
793           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
794           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
795           v   += 36;
796         }
797         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
798         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
799         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
800         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
801         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
802         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
803         idiag   -= 36;
804         i2      -= 6;
805       }
806       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
807     }
808   } else {
809     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
810   }
811   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
812   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
813   PetscFunctionReturn(0);
814 }
815 
816 #undef __FUNCT__
817 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
818 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
819 {
820   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
821   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
822   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
823   const PetscScalar  *b;
824   PetscErrorCode     ierr;
825   PetscInt           m = a->mbs,i,i2,nz,idx;
826   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
827 
828   PetscFunctionBegin;
829   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
830   its = its*lits;
831   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
832   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
833   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
834   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
835   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
836 
837   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
838 
839   diag  = a->diag;
840   idiag = a->idiag;
841   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
842   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
843 
844   if (flag & SOR_ZERO_INITIAL_GUESS) {
845     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
846       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
847       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
848       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
849       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
850       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
851       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
852       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
853       i2     = 7;
854       idiag += 49;
855       for (i=1; i<m; i++) {
856         v     = aa + 49*ai[i];
857         vi    = aj + ai[i];
858         nz    = diag[i] - ai[i];
859         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
860         while (nz--) {
861           idx  = 7*(*vi++);
862           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
863           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
864           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
865           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
866           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
867           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
868           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
869           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
870           v   += 49;
871         }
872         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
873         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
874         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
875         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
876         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
877         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
878         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
879         idiag   += 49;
880         i2      += 7;
881       }
882       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
883       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
884     }
885     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
886         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
887       i2    = 0;
888       mdiag = a->idiag+49*a->mbs;
889       for (i=0; i<m; i++) {
890         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
891         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
892         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
893         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
894         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
895         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
896         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
897         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
898         mdiag  += 36;
899         i2     += 6;
900       }
901       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
902     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
903       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
904     }
905     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
906       idiag   = a->idiag+49*a->mbs - 49;
907       i2      = 7*m - 7;
908       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
909       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
910       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
911       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
912       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
913       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
914       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
915       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
916       idiag -= 49;
917       i2    -= 7;
918       for (i=m-2; i>=0; i--) {
919         v     = aa + 49*(diag[i]+1);
920         vi    = aj + diag[i] + 1;
921         nz    = ai[i+1] - diag[i] - 1;
922         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
923         while (nz--) {
924           idx  = 7*(*vi++);
925           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
926           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
927           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
928           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
929           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
930           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
931           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
932           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
933           v   += 49;
934         }
935         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
936         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
937         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
938         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
939         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
940         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
941         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
942         idiag   -= 49;
943         i2      -= 7;
944       }
945       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
946     }
947   } else {
948     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
949   }
950   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
951   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
952   PetscFunctionReturn(0);
953 }
954 
955 /*
956     Special version for direct calls from Fortran (Used in PETSc-fun3d)
957 */
958 #if defined(PETSC_HAVE_FORTRAN_CAPS)
959 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
961 #define matsetvaluesblocked4_ matsetvaluesblocked4
962 #endif
963 
964 EXTERN_C_BEGIN
965 #undef __FUNCT__
966 #define __FUNCT__ "matsetvaluesblocked4_"
967 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
968 {
969   Mat               A = *AA;
970   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
971   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
972   PetscInt          *ai=a->i,*ailen=a->ilen;
973   PetscInt          *aj=a->j,stepval,lastcol = -1;
974   const PetscScalar *value = v;
975   MatScalar         *ap,*aa = a->a,*bap;
976 
977   PetscFunctionBegin;
978   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
979   stepval = (n-1)*4;
980   for (k=0; k<m; k++) { /* loop over added rows */
981     row  = im[k];
982     rp   = aj + ai[row];
983     ap   = aa + 16*ai[row];
984     nrow = ailen[row];
985     low  = 0;
986     high = nrow;
987     for (l=0; l<n; l++) { /* loop over added columns */
988       col = in[l];
989       if (col <= lastcol) low = 0; else high = nrow;
990       lastcol = col;
991       value = v + k*(stepval+4 + l)*4;
992       while (high-low > 7) {
993         t = (low+high)/2;
994         if (rp[t] > col) high = t;
995         else             low  = t;
996       }
997       for (i=low; i<high; i++) {
998         if (rp[i] > col) break;
999         if (rp[i] == col) {
1000           bap  = ap +  16*i;
1001           for (ii=0; ii<4; ii++,value+=stepval) {
1002             for (jj=ii; jj<16; jj+=4) {
1003               bap[jj] += *value++;
1004             }
1005           }
1006           goto noinsert2;
1007         }
1008       }
1009       N = nrow++ - 1;
1010       high++; /* added new column index thus must search to one higher than before */
1011       /* shift up all the later entries in this row */
1012       for (ii=N; ii>=i; ii--) {
1013         rp[ii+1] = rp[ii];
1014         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1015       }
1016       if (N >= i) {
1017         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1018       }
1019       rp[i] = col;
1020       bap   = ap +  16*i;
1021       for (ii=0; ii<4; ii++,value+=stepval) {
1022         for (jj=ii; jj<16; jj+=4) {
1023           bap[jj] = *value++;
1024         }
1025       }
1026       noinsert2:;
1027       low = i;
1028     }
1029     ailen[row] = nrow;
1030   }
1031   PetscFunctionReturnVoid();
1032 }
1033 EXTERN_C_END
1034 
1035 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1036 #define matsetvalues4_ MATSETVALUES4
1037 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1038 #define matsetvalues4_ matsetvalues4
1039 #endif
1040 
1041 EXTERN_C_BEGIN
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatSetValues4_"
1044 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1045 {
1046   Mat         A = *AA;
1047   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1048   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1049   PetscInt    *ai=a->i,*ailen=a->ilen;
1050   PetscInt    *aj=a->j,brow,bcol;
1051   PetscInt    ridx,cidx,lastcol = -1;
1052   MatScalar   *ap,value,*aa=a->a,*bap;
1053 
1054   PetscFunctionBegin;
1055   for (k=0; k<m; k++) { /* loop over added rows */
1056     row  = im[k]; brow = row/4;
1057     rp   = aj + ai[brow];
1058     ap   = aa + 16*ai[brow];
1059     nrow = ailen[brow];
1060     low  = 0;
1061     high = nrow;
1062     for (l=0; l<n; l++) { /* loop over added columns */
1063       col = in[l]; bcol = col/4;
1064       ridx = row % 4; cidx = col % 4;
1065       value = v[l + k*n];
1066       if (col <= lastcol) low = 0; else high = nrow;
1067       lastcol = col;
1068       while (high-low > 7) {
1069         t = (low+high)/2;
1070         if (rp[t] > bcol) high = t;
1071         else              low  = t;
1072       }
1073       for (i=low; i<high; i++) {
1074         if (rp[i] > bcol) break;
1075         if (rp[i] == bcol) {
1076           bap  = ap +  16*i + 4*cidx + ridx;
1077           *bap += value;
1078           goto noinsert1;
1079         }
1080       }
1081       N = nrow++ - 1;
1082       high++; /* added new column thus must search to one higher than before */
1083       /* shift up all the later entries in this row */
1084       for (ii=N; ii>=i; ii--) {
1085         rp[ii+1] = rp[ii];
1086         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1087       }
1088       if (N>=i) {
1089         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1090       }
1091       rp[i]                    = bcol;
1092       ap[16*i + 4*cidx + ridx] = value;
1093       noinsert1:;
1094       low = i;
1095     }
1096     ailen[brow] = nrow;
1097   }
1098   PetscFunctionReturnVoid();
1099 }
1100 EXTERN_C_END
1101 
1102 /*
1103      Checks for missing diagonals
1104 */
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1107 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1108 {
1109   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1110   PetscErrorCode ierr;
1111   PetscInt       *diag,*jj = a->j,i;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1115   *missing = PETSC_FALSE;
1116   if (A->rmap->n > 0 && !jj) {
1117     *missing  = PETSC_TRUE;
1118     if (d) *d = 0;
1119     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1120   } else {
1121     diag     = a->diag;
1122     for (i=0; i<a->mbs; i++) {
1123       if (jj[diag[i]] != i) {
1124         *missing  = PETSC_TRUE;
1125         if (d) *d = i;
1126         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1127       }
1128     }
1129   }
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1135 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1136 {
1137   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1138   PetscErrorCode ierr;
1139   PetscInt       i,j,m = a->mbs;
1140 
1141   PetscFunctionBegin;
1142   if (!a->diag) {
1143     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1144     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1145     a->free_diag = PETSC_TRUE;
1146   }
1147   for (i=0; i<m; i++) {
1148     a->diag[i] = a->i[i+1];
1149     for (j=a->i[i]; j<a->i[i+1]; j++) {
1150       if (a->j[j] == i) {
1151         a->diag[i] = j;
1152         break;
1153       }
1154     }
1155   }
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 
1160 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1164 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1165 {
1166   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1167   PetscErrorCode ierr;
1168   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1169   PetscInt       *tia, *tja;
1170 
1171   PetscFunctionBegin;
1172   *nn = n;
1173   if (!ia) PetscFunctionReturn(0);
1174   if (symmetric) {
1175     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1176     nz   = tia[n];
1177   } else {
1178     tia = a->i; tja = a->j;
1179   }
1180 
1181   if (!blockcompressed && bs > 1) {
1182     (*nn) *= bs;
1183     nbs    = bs;
1184     /* malloc & create the natural set of indices */
1185     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1186     if (n) {
1187       (*ia)[0] = 0;
1188       for (j=1; j<bs; j++) {
1189         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1190       }
1191     }
1192 
1193     for (i=1; i<n; i++) {
1194       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1195       for (j=1; j<bs; j++) {
1196         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1197       }
1198     }
1199     if (n) {
1200       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1201     }
1202 
1203     if (ja) {
1204       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1205       cnt = 0;
1206       for (i=0; i<n; i++) {
1207         for (j=0; j<bs; j++) {
1208           for (k=tia[i]; k<tia[i+1]; k++) {
1209             for (l=0; l<bs; l++) {
1210               (*ja)[cnt++] = bs*tja[k] + l;
1211 	    }
1212           }
1213         }
1214       }
1215     }
1216 
1217     n     *= bs;
1218     nz *= bs*bs;
1219     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1220       ierr = PetscFree(tia);CHKERRQ(ierr);
1221       ierr = PetscFree(tja);CHKERRQ(ierr);
1222     }
1223   } else if (oshift == 1) {
1224     if (symmetric) {
1225       PetscInt nz = tia[A->rmap->n/bs];
1226       /*  add 1 to i and j indices */
1227       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1228       *ia = tia;
1229       if (ja) {
1230 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1231         *ja = tja;
1232       }
1233     } else {
1234       PetscInt nz = a->i[A->rmap->n/bs];
1235       /* malloc space and  add 1 to i and j indices */
1236       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1237       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1238       if (ja) {
1239 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1240 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1241       }
1242     }
1243   } else {
1244     *ia = tia;
1245     if (ja) *ja = tja;
1246   }
1247 
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1253 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1254 {
1255   PetscErrorCode ierr;
1256 
1257   PetscFunctionBegin;
1258   if (!ia) PetscFunctionReturn(0);
1259   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1260     ierr = PetscFree(*ia);CHKERRQ(ierr);
1261     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1262   }
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1268 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1269 {
1270   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274 #if defined(PETSC_USE_LOG)
1275   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1276 #endif
1277   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1278   if (a->row) {
1279     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1280   }
1281   if (a->col) {
1282     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1283   }
1284   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1285   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1286   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1287   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1288   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1289   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1290   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1291   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1292   if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1293 
1294   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1295   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1296   ierr = PetscFree(a);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1312 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1313 {
1314   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   switch (op) {
1319   case MAT_ROW_ORIENTED:
1320     a->roworiented    = flg;
1321     break;
1322   case MAT_KEEP_NONZERO_PATTERN:
1323     a->keepnonzeropattern = flg;
1324     break;
1325   case MAT_NEW_NONZERO_LOCATIONS:
1326     a->nonew          = (flg ? 0 : 1);
1327     break;
1328   case MAT_NEW_NONZERO_LOCATION_ERR:
1329     a->nonew          = (flg ? -1 : 0);
1330     break;
1331   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1332     a->nonew          = (flg ? -2 : 0);
1333     break;
1334   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1335     a->nounused       = (flg ? -1 : 0);
1336     break;
1337   case MAT_NEW_DIAGONALS:
1338   case MAT_IGNORE_OFF_PROC_ENTRIES:
1339   case MAT_USE_HASH_TABLE:
1340     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1341     break;
1342   case MAT_SYMMETRIC:
1343   case MAT_STRUCTURALLY_SYMMETRIC:
1344   case MAT_HERMITIAN:
1345   case MAT_SYMMETRY_ETERNAL:
1346     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1347     break;
1348   default:
1349     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1350   }
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1356 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1357 {
1358   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1359   PetscErrorCode ierr;
1360   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1361   MatScalar      *aa,*aa_i;
1362   PetscScalar    *v_i;
1363 
1364   PetscFunctionBegin;
1365   bs  = A->rmap->bs;
1366   ai  = a->i;
1367   aj  = a->j;
1368   aa  = a->a;
1369   bs2 = a->bs2;
1370 
1371   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1372 
1373   bn  = row/bs;   /* Block number */
1374   bp  = row % bs; /* Block Position */
1375   M   = ai[bn+1] - ai[bn];
1376   *nz = bs*M;
1377 
1378   if (v) {
1379     *v = 0;
1380     if (*nz) {
1381       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1382       for (i=0; i<M; i++) { /* for each block in the block row */
1383         v_i  = *v + i*bs;
1384         aa_i = aa + bs2*(ai[bn] + i);
1385         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1386       }
1387     }
1388   }
1389 
1390   if (idx) {
1391     *idx = 0;
1392     if (*nz) {
1393       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1394       for (i=0; i<M; i++) { /* for each block in the block row */
1395         idx_i = *idx + i*bs;
1396         itmp  = bs*aj[ai[bn] + i];
1397         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1398       }
1399     }
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1406 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1407 {
1408   PetscErrorCode ierr;
1409 
1410   PetscFunctionBegin;
1411   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1412   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1420 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1421 {
1422   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1423   Mat            C;
1424   PetscErrorCode ierr;
1425   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1426   PetscInt       *rows,*cols,bs2=a->bs2;
1427   MatScalar      *array;
1428 
1429   PetscFunctionBegin;
1430   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1431   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1432     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1433     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1434 
1435     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1436     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1437     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1438     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1439     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1440     ierr = PetscFree(col);CHKERRQ(ierr);
1441   } else {
1442     C = *B;
1443   }
1444 
1445   array = a->a;
1446   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1447   for (i=0; i<mbs; i++) {
1448     cols[0] = i*bs;
1449     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1450     len = ai[i+1] - ai[i];
1451     for (j=0; j<len; j++) {
1452       rows[0] = (*aj++)*bs;
1453       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1454       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1455       array += bs2;
1456     }
1457   }
1458   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1459 
1460   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1461   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1462 
1463   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1464     *B = C;
1465   } else {
1466     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1467   }
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1473 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1474 {
1475   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1476   PetscErrorCode ierr;
1477   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1478   int            fd;
1479   PetscScalar    *aa;
1480   FILE           *file;
1481 
1482   PetscFunctionBegin;
1483   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1484   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1485   col_lens[0] = MAT_FILE_COOKIE;
1486 
1487   col_lens[1] = A->rmap->N;
1488   col_lens[2] = A->cmap->n;
1489   col_lens[3] = a->nz*bs2;
1490 
1491   /* store lengths of each row and write (including header) to file */
1492   count = 0;
1493   for (i=0; i<a->mbs; i++) {
1494     for (j=0; j<bs; j++) {
1495       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1496     }
1497   }
1498   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1499   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1500 
1501   /* store column indices (zero start index) */
1502   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1503   count = 0;
1504   for (i=0; i<a->mbs; i++) {
1505     for (j=0; j<bs; j++) {
1506       for (k=a->i[i]; k<a->i[i+1]; k++) {
1507         for (l=0; l<bs; l++) {
1508           jj[count++] = bs*a->j[k] + l;
1509         }
1510       }
1511     }
1512   }
1513   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1514   ierr = PetscFree(jj);CHKERRQ(ierr);
1515 
1516   /* store nonzero values */
1517   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1518   count = 0;
1519   for (i=0; i<a->mbs; i++) {
1520     for (j=0; j<bs; j++) {
1521       for (k=a->i[i]; k<a->i[i+1]; k++) {
1522         for (l=0; l<bs; l++) {
1523           aa[count++] = a->a[bs2*k + l*bs + j];
1524         }
1525       }
1526     }
1527   }
1528   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1529   ierr = PetscFree(aa);CHKERRQ(ierr);
1530 
1531   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1532   if (file) {
1533     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1534   }
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1540 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1541 {
1542   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1543   PetscErrorCode    ierr;
1544   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1545   PetscViewerFormat format;
1546 
1547   PetscFunctionBegin;
1548   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1549   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1550     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1551   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1552     Mat aij;
1553     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1554     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1555     ierr = MatDestroy(aij);CHKERRQ(ierr);
1556   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1557      PetscFunctionReturn(0);
1558   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1559     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1560     for (i=0; i<a->mbs; i++) {
1561       for (j=0; j<bs; j++) {
1562         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1563         for (k=a->i[i]; k<a->i[i+1]; k++) {
1564           for (l=0; l<bs; l++) {
1565 #if defined(PETSC_USE_COMPLEX)
1566             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1568                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1571                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1572             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1573               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1574             }
1575 #else
1576             if (a->a[bs2*k + l*bs + j] != 0.0) {
1577               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1578             }
1579 #endif
1580           }
1581         }
1582         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1583       }
1584     }
1585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1586   } else {
1587     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1588     for (i=0; i<a->mbs; i++) {
1589       for (j=0; j<bs; j++) {
1590         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1591         for (k=a->i[i]; k<a->i[i+1]; k++) {
1592           for (l=0; l<bs; l++) {
1593 #if defined(PETSC_USE_COMPLEX)
1594             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1595               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1596                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1597             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1598               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1599                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1600             } else {
1601               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1602             }
1603 #else
1604             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1605 #endif
1606           }
1607         }
1608         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1609       }
1610     }
1611     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1612   }
1613   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1619 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1620 {
1621   Mat            A = (Mat) Aa;
1622   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1623   PetscErrorCode ierr;
1624   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1625   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1626   MatScalar      *aa;
1627   PetscViewer    viewer;
1628 
1629   PetscFunctionBegin;
1630 
1631   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1632   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1633 
1634   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1635 
1636   /* loop over matrix elements drawing boxes */
1637   color = PETSC_DRAW_BLUE;
1638   for (i=0,row=0; i<mbs; i++,row+=bs) {
1639     for (j=a->i[i]; j<a->i[i+1]; j++) {
1640       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1641       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1642       aa = a->a + j*bs2;
1643       for (k=0; k<bs; k++) {
1644         for (l=0; l<bs; l++) {
1645           if (PetscRealPart(*aa++) >=  0.) continue;
1646           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1647         }
1648       }
1649     }
1650   }
1651   color = PETSC_DRAW_CYAN;
1652   for (i=0,row=0; i<mbs; i++,row+=bs) {
1653     for (j=a->i[i]; j<a->i[i+1]; j++) {
1654       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1655       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1656       aa = a->a + j*bs2;
1657       for (k=0; k<bs; k++) {
1658         for (l=0; l<bs; l++) {
1659           if (PetscRealPart(*aa++) != 0.) continue;
1660           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1661         }
1662       }
1663     }
1664   }
1665 
1666   color = PETSC_DRAW_RED;
1667   for (i=0,row=0; i<mbs; i++,row+=bs) {
1668     for (j=a->i[i]; j<a->i[i+1]; j++) {
1669       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1670       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1671       aa = a->a + j*bs2;
1672       for (k=0; k<bs; k++) {
1673         for (l=0; l<bs; l++) {
1674           if (PetscRealPart(*aa++) <= 0.) continue;
1675           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1676         }
1677       }
1678     }
1679   }
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1685 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1686 {
1687   PetscErrorCode ierr;
1688   PetscReal      xl,yl,xr,yr,w,h;
1689   PetscDraw      draw;
1690   PetscTruth     isnull;
1691 
1692   PetscFunctionBegin;
1693 
1694   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1695   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1696 
1697   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1698   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1699   xr += w;    yr += h;  xl = -w;     yl = -h;
1700   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1701   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1702   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatView_SeqBAIJ"
1708 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1709 {
1710   PetscErrorCode ierr;
1711   PetscTruth     iascii,isbinary,isdraw;
1712 
1713   PetscFunctionBegin;
1714   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1715   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1716   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1717   if (iascii){
1718     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1719   } else if (isbinary) {
1720     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1721   } else if (isdraw) {
1722     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1723   } else {
1724     Mat B;
1725     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1726     ierr = MatView(B,viewer);CHKERRQ(ierr);
1727     ierr = MatDestroy(B);CHKERRQ(ierr);
1728   }
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1735 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1736 {
1737   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1738   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1739   PetscInt    *ai = a->i,*ailen = a->ilen;
1740   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1741   MatScalar   *ap,*aa = a->a;
1742 
1743   PetscFunctionBegin;
1744   for (k=0; k<m; k++) { /* loop over rows */
1745     row  = im[k]; brow = row/bs;
1746     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1747     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1748     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1749     nrow = ailen[brow];
1750     for (l=0; l<n; l++) { /* loop over columns */
1751       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1752       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1753       col  = in[l] ;
1754       bcol = col/bs;
1755       cidx = col%bs;
1756       ridx = row%bs;
1757       high = nrow;
1758       low  = 0; /* assume unsorted */
1759       while (high-low > 5) {
1760         t = (low+high)/2;
1761         if (rp[t] > bcol) high = t;
1762         else             low  = t;
1763       }
1764       for (i=low; i<high; i++) {
1765         if (rp[i] > bcol) break;
1766         if (rp[i] == bcol) {
1767           *v++ = ap[bs2*i+bs*cidx+ridx];
1768           goto finished;
1769         }
1770       }
1771       *v++ = 0.0;
1772       finished:;
1773     }
1774   }
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #define CHUNKSIZE 10
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1781 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1782 {
1783   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1784   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1785   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1786   PetscErrorCode    ierr;
1787   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1788   PetscTruth        roworiented=a->roworiented;
1789   const PetscScalar *value = v;
1790   MatScalar         *ap,*aa = a->a,*bap;
1791 
1792   PetscFunctionBegin;
1793   if (roworiented) {
1794     stepval = (n-1)*bs;
1795   } else {
1796     stepval = (m-1)*bs;
1797   }
1798   for (k=0; k<m; k++) { /* loop over added rows */
1799     row  = im[k];
1800     if (row < 0) continue;
1801 #if defined(PETSC_USE_DEBUG)
1802     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1803 #endif
1804     rp   = aj + ai[row];
1805     ap   = aa + bs2*ai[row];
1806     rmax = imax[row];
1807     nrow = ailen[row];
1808     low  = 0;
1809     high = nrow;
1810     for (l=0; l<n; l++) { /* loop over added columns */
1811       if (in[l] < 0) continue;
1812 #if defined(PETSC_USE_DEBUG)
1813       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1814 #endif
1815       col = in[l];
1816       if (roworiented) {
1817         value = v + k*(stepval+bs)*bs + l*bs;
1818       } else {
1819         value = v + l*(stepval+bs)*bs + k*bs;
1820       }
1821       if (col <= lastcol) low = 0; else high = nrow;
1822       lastcol = col;
1823       while (high-low > 7) {
1824         t = (low+high)/2;
1825         if (rp[t] > col) high = t;
1826         else             low  = t;
1827       }
1828       for (i=low; i<high; i++) {
1829         if (rp[i] > col) break;
1830         if (rp[i] == col) {
1831           bap  = ap +  bs2*i;
1832           if (roworiented) {
1833             if (is == ADD_VALUES) {
1834               for (ii=0; ii<bs; ii++,value+=stepval) {
1835                 for (jj=ii; jj<bs2; jj+=bs) {
1836                   bap[jj] += *value++;
1837                 }
1838               }
1839             } else {
1840               for (ii=0; ii<bs; ii++,value+=stepval) {
1841                 for (jj=ii; jj<bs2; jj+=bs) {
1842                   bap[jj] = *value++;
1843                 }
1844               }
1845             }
1846           } else {
1847             if (is == ADD_VALUES) {
1848               for (ii=0; ii<bs; ii++,value+=stepval) {
1849                 for (jj=0; jj<bs; jj++) {
1850                   *bap++ += *value++;
1851                 }
1852               }
1853             } else {
1854               for (ii=0; ii<bs; ii++,value+=stepval) {
1855                 for (jj=0; jj<bs; jj++) {
1856                   *bap++  = *value++;
1857                 }
1858               }
1859             }
1860           }
1861           goto noinsert2;
1862         }
1863       }
1864       if (nonew == 1) goto noinsert2;
1865       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1866       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1867       N = nrow++ - 1; high++;
1868       /* shift up all the later entries in this row */
1869       for (ii=N; ii>=i; ii--) {
1870         rp[ii+1] = rp[ii];
1871         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1872       }
1873       if (N >= i) {
1874         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1875       }
1876       rp[i] = col;
1877       bap   = ap +  bs2*i;
1878       if (roworiented) {
1879         for (ii=0; ii<bs; ii++,value+=stepval) {
1880           for (jj=ii; jj<bs2; jj+=bs) {
1881             bap[jj] = *value++;
1882           }
1883         }
1884       } else {
1885         for (ii=0; ii<bs; ii++,value+=stepval) {
1886           for (jj=0; jj<bs; jj++) {
1887             *bap++  = *value++;
1888           }
1889         }
1890       }
1891       noinsert2:;
1892       low = i;
1893     }
1894     ailen[row] = nrow;
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1901 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1902 {
1903   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1904   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1905   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1906   PetscErrorCode ierr;
1907   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1908   MatScalar      *aa = a->a,*ap;
1909   PetscReal      ratio=0.6;
1910 
1911   PetscFunctionBegin;
1912   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1913 
1914   if (m) rmax = ailen[0];
1915   for (i=1; i<mbs; i++) {
1916     /* move each row back by the amount of empty slots (fshift) before it*/
1917     fshift += imax[i-1] - ailen[i-1];
1918     rmax   = PetscMax(rmax,ailen[i]);
1919     if (fshift) {
1920       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1921       N = ailen[i];
1922       for (j=0; j<N; j++) {
1923         ip[j-fshift] = ip[j];
1924         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1925       }
1926     }
1927     ai[i] = ai[i-1] + ailen[i-1];
1928   }
1929   if (mbs) {
1930     fshift += imax[mbs-1] - ailen[mbs-1];
1931     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1932   }
1933   /* reset ilen and imax for each row */
1934   for (i=0; i<mbs; i++) {
1935     ailen[i] = imax[i] = ai[i+1] - ai[i];
1936   }
1937   a->nz = ai[mbs];
1938 
1939   /* diagonals may have moved, so kill the diagonal pointers */
1940   a->idiagvalid = PETSC_FALSE;
1941   if (fshift && a->diag) {
1942     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1943     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1944     a->diag = 0;
1945   }
1946   if (fshift && a->nounused == -1) {
1947     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1948   }
1949   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1950   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1951   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1952   a->reallocs          = 0;
1953   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1954 
1955   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1956   if (a->compressedrow.use){
1957     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1958   }
1959 
1960   A->same_nonzero = PETSC_TRUE;
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 /*
1965    This function returns an array of flags which indicate the locations of contiguous
1966    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1967    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1968    Assume: sizes should be long enough to hold all the values.
1969 */
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1972 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1973 {
1974   PetscInt   i,j,k,row;
1975   PetscTruth flg;
1976 
1977   PetscFunctionBegin;
1978   for (i=0,j=0; i<n; j++) {
1979     row = idx[i];
1980     if (row%bs!=0) { /* Not the begining of a block */
1981       sizes[j] = 1;
1982       i++;
1983     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1984       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1985       i++;
1986     } else { /* Begining of the block, so check if the complete block exists */
1987       flg = PETSC_TRUE;
1988       for (k=1; k<bs; k++) {
1989         if (row+k != idx[i+k]) { /* break in the block */
1990           flg = PETSC_FALSE;
1991           break;
1992         }
1993       }
1994       if (flg) { /* No break in the bs */
1995         sizes[j] = bs;
1996         i+= bs;
1997       } else {
1998         sizes[j] = 1;
1999         i++;
2000       }
2001     }
2002   }
2003   *bs_max = j;
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2009 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2010 {
2011   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2012   PetscErrorCode ierr;
2013   PetscInt       i,j,k,count,*rows;
2014   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2015   PetscScalar    zero = 0.0;
2016   MatScalar      *aa;
2017 
2018   PetscFunctionBegin;
2019   /* Make a copy of the IS and  sort it */
2020   /* allocate memory for rows,sizes */
2021   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2022 
2023   /* copy IS values to rows, and sort them */
2024   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2025   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2026   if (baij->keepnonzeropattern) {
2027     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2028     bs_max = is_n;
2029     A->same_nonzero = PETSC_TRUE;
2030   } else {
2031     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2032     A->same_nonzero = PETSC_FALSE;
2033   }
2034 
2035   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2036     row   = rows[j];
2037     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2038     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2039     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2040     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2041       if (diag != 0.0) {
2042         if (baij->ilen[row/bs] > 0) {
2043           baij->ilen[row/bs]       = 1;
2044           baij->j[baij->i[row/bs]] = row/bs;
2045           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2046         }
2047         /* Now insert all the diagonal values for this bs */
2048         for (k=0; k<bs; k++) {
2049           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2050         }
2051       } else { /* (diag == 0.0) */
2052         baij->ilen[row/bs] = 0;
2053       } /* end (diag == 0.0) */
2054     } else { /* (sizes[i] != bs) */
2055 #if defined (PETSC_USE_DEBUG)
2056       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2057 #endif
2058       for (k=0; k<count; k++) {
2059         aa[0] =  zero;
2060         aa    += bs;
2061       }
2062       if (diag != 0.0) {
2063         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2064       }
2065     }
2066   }
2067 
2068   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2069   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 #undef __FUNCT__
2074 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2075 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2076 {
2077   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2078   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2079   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2080   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2081   PetscErrorCode ierr;
2082   PetscInt       ridx,cidx,bs2=a->bs2;
2083   PetscTruth     roworiented=a->roworiented;
2084   MatScalar      *ap,value,*aa=a->a,*bap;
2085 
2086   PetscFunctionBegin;
2087   if (v) PetscValidScalarPointer(v,6);
2088   for (k=0; k<m; k++) { /* loop over added rows */
2089     row  = im[k];
2090     brow = row/bs;
2091     if (row < 0) continue;
2092 #if defined(PETSC_USE_DEBUG)
2093     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2094 #endif
2095     rp   = aj + ai[brow];
2096     ap   = aa + bs2*ai[brow];
2097     rmax = imax[brow];
2098     nrow = ailen[brow];
2099     low  = 0;
2100     high = nrow;
2101     for (l=0; l<n; l++) { /* loop over added columns */
2102       if (in[l] < 0) continue;
2103 #if defined(PETSC_USE_DEBUG)
2104       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2105 #endif
2106       col = in[l]; bcol = col/bs;
2107       ridx = row % bs; cidx = col % bs;
2108       if (roworiented) {
2109         value = v[l + k*n];
2110       } else {
2111         value = v[k + l*m];
2112       }
2113       if (col <= lastcol) low = 0; else high = nrow;
2114       lastcol = col;
2115       while (high-low > 7) {
2116         t = (low+high)/2;
2117         if (rp[t] > bcol) high = t;
2118         else              low  = t;
2119       }
2120       for (i=low; i<high; i++) {
2121         if (rp[i] > bcol) break;
2122         if (rp[i] == bcol) {
2123           bap  = ap +  bs2*i + bs*cidx + ridx;
2124           if (is == ADD_VALUES) *bap += value;
2125           else                  *bap  = value;
2126           goto noinsert1;
2127         }
2128       }
2129       if (nonew == 1) goto noinsert1;
2130       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2131       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2132       N = nrow++ - 1; high++;
2133       /* shift up all the later entries in this row */
2134       for (ii=N; ii>=i; ii--) {
2135         rp[ii+1] = rp[ii];
2136         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2137       }
2138       if (N>=i) {
2139         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2140       }
2141       rp[i]                      = bcol;
2142       ap[bs2*i + bs*cidx + ridx] = value;
2143       a->nz++;
2144       noinsert1:;
2145       low = i;
2146     }
2147     ailen[brow] = nrow;
2148   }
2149   A->same_nonzero = PETSC_FALSE;
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2155 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2156 {
2157   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2158   Mat            outA;
2159   PetscErrorCode ierr;
2160   PetscTruth     row_identity,col_identity;
2161 
2162   PetscFunctionBegin;
2163   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2164   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2165   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2166   if (!row_identity || !col_identity) {
2167     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2168   }
2169 
2170   outA          = inA;
2171   inA->factor   = MAT_FACTOR_LU;
2172 
2173   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2174 
2175   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2176   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2177   a->row = row;
2178   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2179   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2180   a->col = col;
2181 
2182   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2183   if (a->icol) {
2184     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2185   }
2186   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2187   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2188 
2189   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2190   if (!a->solve_work) {
2191     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2192     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2193   }
2194   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2195 
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 EXTERN_C_BEGIN
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2202 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2203 {
2204   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2205   PetscInt    i,nz,mbs;
2206 
2207   PetscFunctionBegin;
2208   nz  = baij->maxnz/baij->bs2;
2209   mbs = baij->mbs;
2210   for (i=0; i<nz; i++) {
2211     baij->j[i] = indices[i];
2212   }
2213   baij->nz = nz;
2214   for (i=0; i<mbs; i++) {
2215     baij->ilen[i] = baij->imax[i];
2216   }
2217   PetscFunctionReturn(0);
2218 }
2219 EXTERN_C_END
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2223 /*@
2224     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2225        in the matrix.
2226 
2227   Input Parameters:
2228 +  mat - the SeqBAIJ matrix
2229 -  indices - the column indices
2230 
2231   Level: advanced
2232 
2233   Notes:
2234     This can be called if you have precomputed the nonzero structure of the
2235   matrix and want to provide it to the matrix object to improve the performance
2236   of the MatSetValues() operation.
2237 
2238     You MUST have set the correct numbers of nonzeros per row in the call to
2239   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2240 
2241     MUST be called before any calls to MatSetValues();
2242 
2243 @*/
2244 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2245 {
2246   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2247 
2248   PetscFunctionBegin;
2249   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2250   PetscValidPointer(indices,2);
2251   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2252   if (f) {
2253     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2254   } else {
2255     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2256   }
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2262 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2263 {
2264   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2265   PetscErrorCode ierr;
2266   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2267   PetscReal      atmp;
2268   PetscScalar    *x,zero = 0.0;
2269   MatScalar      *aa;
2270   PetscInt       ncols,brow,krow,kcol;
2271 
2272   PetscFunctionBegin;
2273   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2274   bs   = A->rmap->bs;
2275   aa   = a->a;
2276   ai   = a->i;
2277   aj   = a->j;
2278   mbs  = a->mbs;
2279 
2280   ierr = VecSet(v,zero);CHKERRQ(ierr);
2281   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2282   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2283   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2284   for (i=0; i<mbs; i++) {
2285     ncols = ai[1] - ai[0]; ai++;
2286     brow  = bs*i;
2287     for (j=0; j<ncols; j++){
2288       for (kcol=0; kcol<bs; kcol++){
2289         for (krow=0; krow<bs; krow++){
2290           atmp = PetscAbsScalar(*aa);aa++;
2291           row = brow + krow;    /* row index */
2292           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2293           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2294         }
2295       }
2296       aj++;
2297     }
2298   }
2299   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatCopy_SeqBAIJ"
2305 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2306 {
2307   PetscErrorCode ierr;
2308 
2309   PetscFunctionBegin;
2310   /* If the two matrices have the same copy implementation, use fast copy. */
2311   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2312     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2313     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2314 
2315     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2316       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2317     }
2318     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2319   } else {
2320     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2321   }
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2327 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2328 {
2329   PetscErrorCode ierr;
2330 
2331   PetscFunctionBegin;
2332   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 #undef __FUNCT__
2337 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2338 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2339 {
2340   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2341   PetscFunctionBegin;
2342   *array = a->a;
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2348 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2349 {
2350   PetscFunctionBegin;
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 #include "petscblaslapack.h"
2355 #undef __FUNCT__
2356 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2357 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2358 {
2359   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2360   PetscErrorCode ierr;
2361   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2362   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2363 
2364   PetscFunctionBegin;
2365   if (str == SAME_NONZERO_PATTERN) {
2366     PetscScalar alpha = a;
2367     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2368   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2369     if (y->xtoy && y->XtoY != X) {
2370       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2371       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2372     }
2373     if (!y->xtoy) { /* get xtoy */
2374       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2375       y->XtoY = X;
2376       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2377     }
2378     bs2 = bs*bs;
2379     for (i=0; i<x->nz; i++) {
2380       j = 0;
2381       while (j < bs2){
2382         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2383         j++;
2384       }
2385     }
2386     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2387   } else {
2388     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2389   }
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2395 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2396 {
2397   PetscInt rbs,cbs;
2398   PetscErrorCode ierr;
2399 
2400   PetscFunctionBegin;
2401   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2402   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2403   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2404   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2410 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2411 {
2412   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2413   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2414   MatScalar      *aa = a->a;
2415 
2416   PetscFunctionBegin;
2417   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2423 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2424 {
2425   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2426   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2427   MatScalar      *aa = a->a;
2428 
2429   PetscFunctionBegin;
2430   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2435 
2436 #undef __FUNCT__
2437 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2438 /*
2439     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2440 */
2441 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2442 {
2443   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2444   PetscErrorCode ierr;
2445   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2446   PetscInt       nz = a->i[m],row,*jj,mr,col;
2447 
2448   PetscFunctionBegin;
2449   *nn = n;
2450   if (!ia) PetscFunctionReturn(0);
2451   if (symmetric) {
2452     SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices");
2453   } else {
2454     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2455     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2456     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2457     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2458     jj = a->j;
2459     for (i=0; i<nz; i++) {
2460       collengths[jj[i]]++;
2461     }
2462     cia[0] = oshift;
2463     for (i=0; i<n; i++) {
2464       cia[i+1] = cia[i] + collengths[i];
2465     }
2466     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2467     jj   = a->j;
2468     for (row=0; row<m; row++) {
2469       mr = a->i[row+1] - a->i[row];
2470       for (i=0; i<mr; i++) {
2471         col = *jj++;
2472         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2473       }
2474     }
2475     ierr = PetscFree(collengths);CHKERRQ(ierr);
2476     *ia = cia; *ja = cja;
2477   }
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2483 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2484 {
2485   PetscErrorCode ierr;
2486 
2487   PetscFunctionBegin;
2488   if (!ia) PetscFunctionReturn(0);
2489   ierr = PetscFree(*ia);CHKERRQ(ierr);
2490   ierr = PetscFree(*ja);CHKERRQ(ierr);
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 #undef __FUNCT__
2495 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2496 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2497 {
2498   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2499   PetscErrorCode ierr;
2500   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2501   PetscScalar    dx,*y,*xx,*w3_array;
2502   PetscScalar    *vscale_array;
2503   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2504   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2505   void           *fctx = coloring->fctx;
2506   PetscTruth     flg = PETSC_FALSE;
2507   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2508   Vec            x1_tmp;
2509 
2510   PetscFunctionBegin;
2511   PetscValidHeaderSpecific(J,MAT_COOKIE,1);
2512   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2);
2513   PetscValidHeaderSpecific(x1,VEC_COOKIE,3);
2514   if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2515 
2516   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2517   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2518   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2519   if (flg) {
2520     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2521   } else {
2522     PetscTruth assembled;
2523     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2524     if (assembled) {
2525       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2526     }
2527   }
2528 
2529   x1_tmp = x1;
2530   if (!coloring->vscale){
2531     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2532   }
2533 
2534   /*
2535     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2536     coloring->F for the coarser grids from the finest
2537   */
2538   if (coloring->F) {
2539     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2540     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2541     if (m1 != m2) {
2542       coloring->F = 0;
2543       }
2544     }
2545 
2546   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2547     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2548   }
2549   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2550 
2551   /* Set w1 = F(x1) */
2552   if (coloring->F) {
2553     w1          = coloring->F; /* use already computed value of function */
2554     coloring->F = 0;
2555   } else {
2556     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2557     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2558     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2559   }
2560 
2561   if (!coloring->w3) {
2562     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2563     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2564   }
2565   w3 = coloring->w3;
2566 
2567     CHKMEMQ;
2568     /* Compute all the local scale factors, including ghost points */
2569   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2570   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2571   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2572   if (ctype == IS_COLORING_GHOSTED){
2573     col_start = 0; col_end = N;
2574   } else if (ctype == IS_COLORING_GLOBAL){
2575     xx = xx - start;
2576     vscale_array = vscale_array - start;
2577     col_start = start; col_end = N + start;
2578   }    CHKMEMQ;
2579   for (col=col_start; col<col_end; col++){
2580     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2581     if (coloring->htype[0] == 'w') {
2582       dx = 1.0 + unorm;
2583     } else {
2584       dx  = xx[col];
2585     }
2586     if (dx == 0.0) dx = 1.0;
2587 #if !defined(PETSC_USE_COMPLEX)
2588     if (dx < umin && dx >= 0.0)      dx = umin;
2589     else if (dx < 0.0 && dx > -umin) dx = -umin;
2590 #else
2591     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2592     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2593 #endif
2594     dx               *= epsilon;
2595     vscale_array[col] = 1.0/dx;
2596   }     CHKMEMQ;
2597   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2598   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2599   if (ctype == IS_COLORING_GLOBAL){
2600     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2601     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2602   }
2603         CHKMEMQ;
2604   if (coloring->vscaleforrow) {
2605     vscaleforrow = coloring->vscaleforrow;
2606   } else {
2607     SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2608   }
2609 
2610 
2611   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2612   /*
2613     Loop over each color
2614   */
2615   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2616   for (k=0; k<coloring->ncolors; k++) {
2617     coloring->currentcolor = k;
2618     for (i=0; i<bs; i++) {
2619       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2620       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2621       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2622       /*
2623 	Loop over each column associated with color
2624 	adding the perturbation to the vector w3.
2625       */
2626       for (l=0; l<coloring->ncolumns[k]; l++) {
2627 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2628 	if (coloring->htype[0] == 'w') {
2629 	  dx = 1.0 + unorm;
2630 	} else {
2631 	  dx  = xx[col];
2632 	}
2633 	if (dx == 0.0) dx = 1.0;
2634 #if !defined(PETSC_USE_COMPLEX)
2635 	if (dx < umin && dx >= 0.0)      dx = umin;
2636 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2637 #else
2638 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2639 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2640 #endif
2641 	dx            *= epsilon;
2642 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2643 	w3_array[col] += dx;
2644       }
2645       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2646       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2647 
2648       /*
2649 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2650 	w2 = F(x1 + dx) - F(x1)
2651       */
2652       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2653       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2654       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2655       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2656 
2657       /*
2658 	Loop over rows of vector, putting results into Jacobian matrix
2659       */
2660       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2661       for (l=0; l<coloring->nrows[k]; l++) {
2662 	row    = bs*coloring->rows[k][l];             /* local row index */
2663 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2664         for (j=0; j<bs; j++) {
2665   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2666           srows[j]  = row + start + j;
2667         }
2668 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2669       }
2670       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2671     }
2672   } /* endof for each color */
2673   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2674   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2675   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2676   ierr = PetscFree(srows);CHKERRQ(ierr);
2677 
2678   coloring->currentcolor = -1;
2679   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2680   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2681   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2682   PetscFunctionReturn(0);
2683 }
2684 
2685 /* -------------------------------------------------------------------*/
2686 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2687        MatGetRow_SeqBAIJ,
2688        MatRestoreRow_SeqBAIJ,
2689        MatMult_SeqBAIJ_N,
2690 /* 4*/ MatMultAdd_SeqBAIJ_N,
2691        MatMultTranspose_SeqBAIJ,
2692        MatMultTransposeAdd_SeqBAIJ,
2693        0,
2694        0,
2695        0,
2696 /*10*/ 0,
2697        MatLUFactor_SeqBAIJ,
2698        0,
2699        0,
2700        MatTranspose_SeqBAIJ,
2701 /*15*/ MatGetInfo_SeqBAIJ,
2702        MatEqual_SeqBAIJ,
2703        MatGetDiagonal_SeqBAIJ,
2704        MatDiagonalScale_SeqBAIJ,
2705        MatNorm_SeqBAIJ,
2706 /*20*/ 0,
2707        MatAssemblyEnd_SeqBAIJ,
2708        MatSetOption_SeqBAIJ,
2709        MatZeroEntries_SeqBAIJ,
2710 /*24*/ MatZeroRows_SeqBAIJ,
2711        0,
2712        0,
2713        0,
2714        0,
2715 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2716        0,
2717        0,
2718        MatGetArray_SeqBAIJ,
2719        MatRestoreArray_SeqBAIJ,
2720 /*34*/ MatDuplicate_SeqBAIJ,
2721        0,
2722        0,
2723        MatILUFactor_SeqBAIJ,
2724        0,
2725 /*39*/ MatAXPY_SeqBAIJ,
2726        MatGetSubMatrices_SeqBAIJ,
2727        MatIncreaseOverlap_SeqBAIJ,
2728        MatGetValues_SeqBAIJ,
2729        MatCopy_SeqBAIJ,
2730 /*44*/ 0,
2731        MatScale_SeqBAIJ,
2732        0,
2733        0,
2734        0,
2735 /*49*/ MatSetBlockSize_SeqBAIJ,
2736        MatGetRowIJ_SeqBAIJ,
2737        MatRestoreRowIJ_SeqBAIJ,
2738        MatGetColumnIJ_SeqBAIJ,
2739        MatRestoreColumnIJ_SeqBAIJ,
2740 /*54*/ MatFDColoringCreate_SeqAIJ,
2741        0,
2742        0,
2743        0,
2744        MatSetValuesBlocked_SeqBAIJ,
2745 /*59*/ MatGetSubMatrix_SeqBAIJ,
2746        MatDestroy_SeqBAIJ,
2747        MatView_SeqBAIJ,
2748        0,
2749        0,
2750 /*64*/ 0,
2751        0,
2752        0,
2753        0,
2754        0,
2755 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2756        0,
2757        MatConvert_Basic,
2758        0,
2759        0,
2760 /*74*/ 0,
2761        MatFDColoringApply_BAIJ,
2762        0,
2763        0,
2764        0,
2765 /*79*/ 0,
2766        0,
2767        0,
2768        0,
2769        MatLoad_SeqBAIJ,
2770 /*84*/ 0,
2771        0,
2772        0,
2773        0,
2774        0,
2775 /*89*/ 0,
2776        0,
2777        0,
2778        0,
2779        0,
2780 /*94*/ 0,
2781        0,
2782        0,
2783        0,
2784        0,
2785 /*99*/0,
2786        0,
2787        0,
2788        0,
2789        0,
2790 /*104*/0,
2791        MatRealPart_SeqBAIJ,
2792        MatImaginaryPart_SeqBAIJ,
2793        0,
2794        0,
2795 /*109*/0,
2796        0,
2797        0,
2798        0,
2799        MatMissingDiagonal_SeqBAIJ,
2800 /*114*/0,
2801        0,
2802        0,
2803        0,
2804        0,
2805 /*119*/0,
2806        0,
2807        MatMultHermitianTranspose_SeqBAIJ,
2808        MatMultHermitianTransposeAdd_SeqBAIJ
2809 };
2810 
2811 EXTERN_C_BEGIN
2812 #undef __FUNCT__
2813 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2814 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2815 {
2816   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2817   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2818   PetscErrorCode ierr;
2819 
2820   PetscFunctionBegin;
2821   if (aij->nonew != 1) {
2822     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2823   }
2824 
2825   /* allocate space for values if not already there */
2826   if (!aij->saved_values) {
2827     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2828     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2829   }
2830 
2831   /* copy values over */
2832   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2833   PetscFunctionReturn(0);
2834 }
2835 EXTERN_C_END
2836 
2837 EXTERN_C_BEGIN
2838 #undef __FUNCT__
2839 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2840 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2841 {
2842   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2843   PetscErrorCode ierr;
2844   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2845 
2846   PetscFunctionBegin;
2847   if (aij->nonew != 1) {
2848     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2849   }
2850   if (!aij->saved_values) {
2851     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2852   }
2853 
2854   /* copy values over */
2855   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2856   PetscFunctionReturn(0);
2857 }
2858 EXTERN_C_END
2859 
2860 EXTERN_C_BEGIN
2861 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2862 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2863 EXTERN_C_END
2864 
2865 EXTERN_C_BEGIN
2866 #undef __FUNCT__
2867 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2868 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2869 {
2870   Mat_SeqBAIJ    *b;
2871   PetscErrorCode ierr;
2872   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2873   PetscTruth     flg,skipallocation = PETSC_FALSE;
2874   PetscInt       ver;
2875 
2876   PetscFunctionBegin;
2877 
2878   ierr = PetscOptionsGetInt(PETSC_NULL,"-ver",&ver,PETSC_NULL);CHKERRQ(ierr);
2879   if (nz == MAT_SKIP_ALLOCATION) {
2880     skipallocation = PETSC_TRUE;
2881     nz             = 0;
2882   }
2883 
2884   if (bs < 0) {
2885     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2886       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2887     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2888     bs   = PetscAbs(bs);
2889   }
2890   if (nnz && newbs != bs) {
2891     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2892   }
2893   bs   = newbs;
2894 
2895   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2896   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2897   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2898   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2899 
2900   B->preallocated = PETSC_TRUE;
2901 
2902   mbs  = B->rmap->n/bs;
2903   nbs  = B->cmap->n/bs;
2904   bs2  = bs*bs;
2905 
2906   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2907     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2908   }
2909 
2910   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2911   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2912   if (nnz) {
2913     for (i=0; i<mbs; i++) {
2914       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2915       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2916     }
2917   }
2918 
2919   b       = (Mat_SeqBAIJ*)B->data;
2920   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2921     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2922   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2923 
2924   if (!flg) {
2925     switch (bs) {
2926     case 1:
2927       B->ops->mult            = MatMult_SeqBAIJ_1;
2928       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2929       B->ops->sor             = MatSOR_SeqBAIJ_1;
2930       break;
2931     case 2:
2932       B->ops->mult            = MatMult_SeqBAIJ_2;
2933       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2934       B->ops->sor             = MatSOR_SeqBAIJ_2;
2935       break;
2936     case 3:
2937       B->ops->mult            = MatMult_SeqBAIJ_3;
2938       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2939       B->ops->sor             = MatSOR_SeqBAIJ_3;
2940       break;
2941     case 4:
2942       B->ops->mult            = MatMult_SeqBAIJ_4;
2943       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2944       B->ops->sor             = MatSOR_SeqBAIJ_4;
2945       break;
2946     case 5:
2947       B->ops->mult            = MatMult_SeqBAIJ_5;
2948       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2949       B->ops->sor             = MatSOR_SeqBAIJ_5;
2950       break;
2951     case 6:
2952       B->ops->mult            = MatMult_SeqBAIJ_6;
2953       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2954       B->ops->sor             = MatSOR_SeqBAIJ_6;
2955       break;
2956     case 7:
2957       B->ops->mult            = MatMult_SeqBAIJ_7;
2958       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2959       B->ops->sor             = MatSOR_SeqBAIJ_7;
2960       break;
2961     case 15:
2962       if (ver == 1) B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2963       else if (ver == 2) B->ops->mult = MatMult_SeqBAIJ_15_ver2;
2964       else if (ver == 3) B->ops->mult = MatMult_SeqBAIJ_15_ver3;
2965       else if (ver == 4) B->ops->mult = MatMult_SeqBAIJ_15_ver4;
2966       else B->ops->mult = MatMult_SeqBAIJ_N;
2967       break;
2968     default:
2969       B->ops->mult            = MatMult_SeqBAIJ_N;
2970       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2971       break;
2972     }
2973   }
2974   B->rmap->bs      = bs;
2975   b->mbs     = mbs;
2976   b->nbs     = nbs;
2977   if (!skipallocation) {
2978     if (!b->imax) {
2979       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2980       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
2981       b->free_imax_ilen = PETSC_TRUE;
2982     }
2983     /* b->ilen will count nonzeros in each block row so far. */
2984     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2985     if (!nnz) {
2986       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2987       else if (nz <= 0)        nz = 1;
2988       for (i=0; i<mbs; i++) b->imax[i] = nz;
2989       nz = nz*mbs;
2990     } else {
2991       nz = 0;
2992       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2993     }
2994 
2995     /* allocate the matrix space */
2996     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2997     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2998     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2999     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3000     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3001     b->singlemalloc = PETSC_TRUE;
3002     b->i[0] = 0;
3003     for (i=1; i<mbs+1; i++) {
3004       b->i[i] = b->i[i-1] + b->imax[i-1];
3005     }
3006     b->free_a     = PETSC_TRUE;
3007     b->free_ij    = PETSC_TRUE;
3008   } else {
3009     b->free_a     = PETSC_FALSE;
3010     b->free_ij    = PETSC_FALSE;
3011   }
3012 
3013   B->rmap->bs          = bs;
3014   b->bs2              = bs2;
3015   b->mbs              = mbs;
3016   b->nz               = 0;
3017   b->maxnz            = nz*bs2;
3018   B->info.nz_unneeded = (PetscReal)b->maxnz;
3019   PetscFunctionReturn(0);
3020 }
3021 EXTERN_C_END
3022 
3023 EXTERN_C_BEGIN
3024 #undef __FUNCT__
3025 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3026 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3027 {
3028   PetscInt       i,m,nz,nz_max=0,*nnz;
3029   PetscScalar    *values=0;
3030   PetscErrorCode ierr;
3031 
3032   PetscFunctionBegin;
3033 
3034   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3035 
3036   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3037   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3038   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3039   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3040   m = B->rmap->n/bs;
3041 
3042   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3043   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3044   for(i=0; i<m; i++) {
3045     nz = ii[i+1]- ii[i];
3046     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3047     nz_max = PetscMax(nz_max, nz);
3048     nnz[i] = nz;
3049   }
3050   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3051   ierr = PetscFree(nnz);CHKERRQ(ierr);
3052 
3053   values = (PetscScalar*)V;
3054   if (!values) {
3055     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3056     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3057   }
3058   for (i=0; i<m; i++) {
3059     PetscInt          ncols  = ii[i+1] - ii[i];
3060     const PetscInt    *icols = jj + ii[i];
3061     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3062     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3063   }
3064   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3065   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3066   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3067 
3068   PetscFunctionReturn(0);
3069 }
3070 EXTERN_C_END
3071 
3072 
3073 EXTERN_C_BEGIN
3074 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3075 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3076 EXTERN_C_END
3077 
3078 /*MC
3079    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3080    block sparse compressed row format.
3081 
3082    Options Database Keys:
3083 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3084 
3085   Level: beginner
3086 
3087 .seealso: MatCreateSeqBAIJ()
3088 M*/
3089 
3090 
3091 EXTERN_C_BEGIN
3092 #undef __FUNCT__
3093 #define __FUNCT__ "MatCreate_SeqBAIJ"
3094 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3095 {
3096   PetscErrorCode ierr;
3097   PetscMPIInt    size;
3098   Mat_SeqBAIJ    *b;
3099 
3100   PetscFunctionBegin;
3101   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3102   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3103 
3104   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3105   B->data = (void*)b;
3106   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3107   B->mapping               = 0;
3108   b->row                   = 0;
3109   b->col                   = 0;
3110   b->icol                  = 0;
3111   b->reallocs              = 0;
3112   b->saved_values          = 0;
3113 
3114   b->roworiented           = PETSC_TRUE;
3115   b->nonew                 = 0;
3116   b->diag                  = 0;
3117   b->solve_work            = 0;
3118   b->mult_work             = 0;
3119   B->spptr                 = 0;
3120   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3121   b->keepnonzeropattern    = PETSC_FALSE;
3122   b->xtoy                  = 0;
3123   b->XtoY                  = 0;
3124   b->compressedrow.use     = PETSC_FALSE;
3125   b->compressedrow.nrows   = 0;
3126   b->compressedrow.i       = PETSC_NULL;
3127   b->compressedrow.rindex  = PETSC_NULL;
3128   b->compressedrow.checked = PETSC_FALSE;
3129   B->same_nonzero          = PETSC_FALSE;
3130 
3131   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3132                                      "MatGetFactorAvailable_seqbaij_petsc",
3133                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3134   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3135                                      "MatGetFactor_seqbaij_petsc",
3136                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3137   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3138                                      "MatInvertBlockDiagonal_SeqBAIJ",
3139                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3140   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3141                                      "MatStoreValues_SeqBAIJ",
3142                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3143   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3144                                      "MatRetrieveValues_SeqBAIJ",
3145                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3146   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3147                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3148                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3149   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3150                                      "MatConvert_SeqBAIJ_SeqAIJ",
3151                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3152   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3153                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3154                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3155   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3156                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3157                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3158   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3159                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3160                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3161   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3162   PetscFunctionReturn(0);
3163 }
3164 EXTERN_C_END
3165 
3166 #undef __FUNCT__
3167 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3168 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3169 {
3170   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3171   PetscErrorCode ierr;
3172   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3173 
3174   PetscFunctionBegin;
3175   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
3176 
3177   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3178     c->imax = a->imax;
3179     c->ilen = a->ilen;
3180     c->free_imax_ilen = PETSC_FALSE;
3181   } else {
3182     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3183     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3184     for (i=0; i<mbs; i++) {
3185       c->imax[i] = a->imax[i];
3186       c->ilen[i] = a->ilen[i];
3187     }
3188     c->free_imax_ilen = PETSC_TRUE;
3189   }
3190 
3191   /* allocate the matrix space */
3192   if (mallocmatspace){
3193     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3194       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3195       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3196       c->singlemalloc = PETSC_FALSE;
3197       c->free_ij      = PETSC_FALSE;
3198       c->i            = a->i;
3199       c->j            = a->j;
3200       c->parent       = A;
3201       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3202       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3203       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3204     } else {
3205       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3206       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3207       c->singlemalloc = PETSC_TRUE;
3208       c->free_ij      = PETSC_TRUE;
3209       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3210       if (mbs > 0) {
3211 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3212 	if (cpvalues == MAT_COPY_VALUES) {
3213 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3214 	} else {
3215 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3216 	}
3217       }
3218     }
3219   }
3220 
3221   c->roworiented = a->roworiented;
3222   c->nonew       = a->nonew;
3223   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3224   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3225   c->bs2         = a->bs2;
3226   c->mbs         = a->mbs;
3227   c->nbs         = a->nbs;
3228 
3229   if (a->diag) {
3230     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3231       c->diag      = a->diag;
3232       c->free_diag = PETSC_FALSE;
3233     } else {
3234       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3235       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3236       for (i=0; i<mbs; i++) {
3237         c->diag[i] = a->diag[i];
3238       }
3239       c->free_diag = PETSC_TRUE;
3240     }
3241   } else c->diag        = 0;
3242   c->nz                 = a->nz;
3243   c->maxnz              = a->maxnz;
3244   c->solve_work         = 0;
3245   c->mult_work          = 0;
3246   c->free_a             = PETSC_TRUE;
3247   c->free_ij            = PETSC_TRUE;
3248   C->preallocated       = PETSC_TRUE;
3249   C->assembled          = PETSC_TRUE;
3250 
3251   c->compressedrow.use     = a->compressedrow.use;
3252   c->compressedrow.nrows   = a->compressedrow.nrows;
3253   c->compressedrow.checked = a->compressedrow.checked;
3254   if (a->compressedrow.checked && a->compressedrow.use){
3255     i = a->compressedrow.nrows;
3256     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3257     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3258     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3259     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3260   } else {
3261     c->compressedrow.use    = PETSC_FALSE;
3262     c->compressedrow.i      = PETSC_NULL;
3263     c->compressedrow.rindex = PETSC_NULL;
3264   }
3265   C->same_nonzero = A->same_nonzero;
3266   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3267   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3268   PetscFunctionReturn(0);
3269 }
3270 
3271 #undef __FUNCT__
3272 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3273 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3274 {
3275     PetscErrorCode ierr;
3276 
3277   PetscFunctionBegin;
3278   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3279   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3280   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3281   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3282   PetscFunctionReturn(0);
3283 }
3284 
3285 #undef __FUNCT__
3286 #define __FUNCT__ "MatLoad_SeqBAIJ"
3287 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3288 {
3289   Mat_SeqBAIJ    *a;
3290   Mat            B;
3291   PetscErrorCode ierr;
3292   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3293   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3294   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3295   PetscInt       *masked,nmask,tmp,bs2,ishift;
3296   PetscMPIInt    size;
3297   int            fd;
3298   PetscScalar    *aa;
3299   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3300 
3301   PetscFunctionBegin;
3302   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3303     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3304   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3305   bs2  = bs*bs;
3306 
3307   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3308   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
3309   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3310   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3311   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3312   M = header[1]; N = header[2]; nz = header[3];
3313 
3314   if (header[3] < 0) {
3315     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3316   }
3317 
3318   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3319 
3320   /*
3321      This code adds extra rows to make sure the number of rows is
3322     divisible by the blocksize
3323   */
3324   mbs        = M/bs;
3325   extra_rows = bs - M + bs*(mbs);
3326   if (extra_rows == bs) extra_rows = 0;
3327   else                  mbs++;
3328   if (extra_rows) {
3329     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3330   }
3331 
3332   /* read in row lengths */
3333   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3334   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3335   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3336 
3337   /* read in column indices */
3338   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3339   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3340   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3341 
3342   /* loop over row lengths determining block row lengths */
3343   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3344   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3345   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3346   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3347   rowcount = 0;
3348   nzcount = 0;
3349   for (i=0; i<mbs; i++) {
3350     nmask = 0;
3351     for (j=0; j<bs; j++) {
3352       kmax = rowlengths[rowcount];
3353       for (k=0; k<kmax; k++) {
3354         tmp = jj[nzcount++]/bs;
3355         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3356       }
3357       rowcount++;
3358     }
3359     browlengths[i] += nmask;
3360     /* zero out the mask elements we set */
3361     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3362   }
3363 
3364   /* create our matrix */
3365   ierr = MatCreate(comm,&B);
3366   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3367   ierr = MatSetType(B,type);CHKERRQ(ierr);
3368   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3369   a = (Mat_SeqBAIJ*)B->data;
3370 
3371   /* set matrix "i" values */
3372   a->i[0] = 0;
3373   for (i=1; i<= mbs; i++) {
3374     a->i[i]      = a->i[i-1] + browlengths[i-1];
3375     a->ilen[i-1] = browlengths[i-1];
3376   }
3377   a->nz         = 0;
3378   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3379 
3380   /* read in nonzero values */
3381   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3382   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3383   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3384 
3385   /* set "a" and "j" values into matrix */
3386   nzcount = 0; jcount = 0;
3387   for (i=0; i<mbs; i++) {
3388     nzcountb = nzcount;
3389     nmask    = 0;
3390     for (j=0; j<bs; j++) {
3391       kmax = rowlengths[i*bs+j];
3392       for (k=0; k<kmax; k++) {
3393         tmp = jj[nzcount++]/bs;
3394 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3395       }
3396     }
3397     /* sort the masked values */
3398     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3399 
3400     /* set "j" values into matrix */
3401     maskcount = 1;
3402     for (j=0; j<nmask; j++) {
3403       a->j[jcount++]  = masked[j];
3404       mask[masked[j]] = maskcount++;
3405     }
3406     /* set "a" values into matrix */
3407     ishift = bs2*a->i[i];
3408     for (j=0; j<bs; j++) {
3409       kmax = rowlengths[i*bs+j];
3410       for (k=0; k<kmax; k++) {
3411         tmp       = jj[nzcountb]/bs ;
3412         block     = mask[tmp] - 1;
3413         point     = jj[nzcountb] - bs*tmp;
3414         idx       = ishift + bs2*block + j + bs*point;
3415         a->a[idx] = (MatScalar)aa[nzcountb++];
3416       }
3417     }
3418     /* zero out the mask elements we set */
3419     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3420   }
3421   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3422 
3423   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3424   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3425   ierr = PetscFree(aa);CHKERRQ(ierr);
3426   ierr = PetscFree(jj);CHKERRQ(ierr);
3427   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3428 
3429   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3430   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3431   ierr = MatView_Private(B);CHKERRQ(ierr);
3432 
3433   *A = B;
3434   PetscFunctionReturn(0);
3435 }
3436 
3437 #undef __FUNCT__
3438 #define __FUNCT__ "MatCreateSeqBAIJ"
3439 /*@C
3440    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3441    compressed row) format.  For good matrix assembly performance the
3442    user should preallocate the matrix storage by setting the parameter nz
3443    (or the array nnz).  By setting these parameters accurately, performance
3444    during matrix assembly can be increased by more than a factor of 50.
3445 
3446    Collective on MPI_Comm
3447 
3448    Input Parameters:
3449 +  comm - MPI communicator, set to PETSC_COMM_SELF
3450 .  bs - size of block
3451 .  m - number of rows
3452 .  n - number of columns
3453 .  nz - number of nonzero blocks  per block row (same for all rows)
3454 -  nnz - array containing the number of nonzero blocks in the various block rows
3455          (possibly different for each block row) or PETSC_NULL
3456 
3457    Output Parameter:
3458 .  A - the matrix
3459 
3460    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3461    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3462    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3463 
3464    Options Database Keys:
3465 .   -mat_no_unroll - uses code that does not unroll the loops in the
3466                      block calculations (much slower)
3467 .    -mat_block_size - size of the blocks to use
3468 
3469    Level: intermediate
3470 
3471    Notes:
3472    The number of rows and columns must be divisible by blocksize.
3473 
3474    If the nnz parameter is given then the nz parameter is ignored
3475 
3476    A nonzero block is any block that as 1 or more nonzeros in it
3477 
3478    The block AIJ format is fully compatible with standard Fortran 77
3479    storage.  That is, the stored row and column indices can begin at
3480    either one (as in Fortran) or zero.  See the users' manual for details.
3481 
3482    Specify the preallocated storage with either nz or nnz (not both).
3483    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3484    allocation.  For additional details, see the users manual chapter on
3485    matrices.
3486 
3487 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3488 @*/
3489 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3490 {
3491   PetscErrorCode ierr;
3492 
3493   PetscFunctionBegin;
3494   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3495   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3496   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3497   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3498   PetscFunctionReturn(0);
3499 }
3500 
3501 #undef __FUNCT__
3502 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3503 /*@C
3504    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3505    per row in the matrix. For good matrix assembly performance the
3506    user should preallocate the matrix storage by setting the parameter nz
3507    (or the array nnz).  By setting these parameters accurately, performance
3508    during matrix assembly can be increased by more than a factor of 50.
3509 
3510    Collective on MPI_Comm
3511 
3512    Input Parameters:
3513 +  A - the matrix
3514 .  bs - size of block
3515 .  nz - number of block nonzeros per block row (same for all rows)
3516 -  nnz - array containing the number of block nonzeros in the various block rows
3517          (possibly different for each block row) or PETSC_NULL
3518 
3519    Options Database Keys:
3520 .   -mat_no_unroll - uses code that does not unroll the loops in the
3521                      block calculations (much slower)
3522 .    -mat_block_size - size of the blocks to use
3523 
3524    Level: intermediate
3525 
3526    Notes:
3527    If the nnz parameter is given then the nz parameter is ignored
3528 
3529    You can call MatGetInfo() to get information on how effective the preallocation was;
3530    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3531    You can also run with the option -info and look for messages with the string
3532    malloc in them to see if additional memory allocation was needed.
3533 
3534    The block AIJ format is fully compatible with standard Fortran 77
3535    storage.  That is, the stored row and column indices can begin at
3536    either one (as in Fortran) or zero.  See the users' manual for details.
3537 
3538    Specify the preallocated storage with either nz or nnz (not both).
3539    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3540    allocation.  For additional details, see the users manual chapter on
3541    matrices.
3542 
3543 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3544 @*/
3545 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3546 {
3547   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3548 
3549   PetscFunctionBegin;
3550   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3551   if (f) {
3552     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3553   }
3554   PetscFunctionReturn(0);
3555 }
3556 
3557 #undef __FUNCT__
3558 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3559 /*@C
3560    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3561    (the default sequential PETSc format).
3562 
3563    Collective on MPI_Comm
3564 
3565    Input Parameters:
3566 +  A - the matrix
3567 .  i - the indices into j for the start of each local row (starts with zero)
3568 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3569 -  v - optional values in the matrix
3570 
3571    Level: developer
3572 
3573 .keywords: matrix, aij, compressed row, sparse
3574 
3575 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3576 @*/
3577 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3578 {
3579   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3580 
3581   PetscFunctionBegin;
3582   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3583   if (f) {
3584     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3585   }
3586   PetscFunctionReturn(0);
3587 }
3588 
3589 
3590 #undef __FUNCT__
3591 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3592 /*@
3593      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3594               (upper triangular entries in CSR format) provided by the user.
3595 
3596      Collective on MPI_Comm
3597 
3598    Input Parameters:
3599 +  comm - must be an MPI communicator of size 1
3600 .  bs - size of block
3601 .  m - number of rows
3602 .  n - number of columns
3603 .  i - row indices
3604 .  j - column indices
3605 -  a - matrix values
3606 
3607    Output Parameter:
3608 .  mat - the matrix
3609 
3610    Level: intermediate
3611 
3612    Notes:
3613        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3614     once the matrix is destroyed
3615 
3616        You cannot set new nonzero locations into this matrix, that will generate an error.
3617 
3618        The i and j indices are 0 based
3619 
3620 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3621 
3622 @*/
3623 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3624 {
3625   PetscErrorCode ierr;
3626   PetscInt       ii;
3627   Mat_SeqBAIJ    *baij;
3628 
3629   PetscFunctionBegin;
3630   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3631   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3632 
3633   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3634   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3635   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3636   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3637   baij = (Mat_SeqBAIJ*)(*mat)->data;
3638   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3639   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3640 
3641   baij->i = i;
3642   baij->j = j;
3643   baij->a = a;
3644   baij->singlemalloc = PETSC_FALSE;
3645   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3646   baij->free_a       = PETSC_FALSE;
3647   baij->free_ij       = PETSC_FALSE;
3648 
3649   for (ii=0; ii<m; ii++) {
3650     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3651 #if defined(PETSC_USE_DEBUG)
3652     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3653 #endif
3654   }
3655 #if defined(PETSC_USE_DEBUG)
3656   for (ii=0; ii<baij->i[m]; ii++) {
3657     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3658     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3659   }
3660 #endif
3661 
3662   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3663   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3664   PetscFunctionReturn(0);
3665 }
3666