xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 81f96dcc196638099f4f8062b7ddb72e8b817c95)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
8 #include "../src/mat/blockinvert.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
12 /*@
13   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
14 
15   Collective on Mat
16 
17   Input Parameters:
18 . mat - the matrix
19 
20   Level: advanced
21 @*/
22 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
23 {
24   PetscErrorCode ierr,(*f)(Mat);
25 
26   PetscFunctionBegin;
27   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
28   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
29   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
30 
31   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
32   if (f) {
33     ierr = (*f)(mat);CHKERRQ(ierr);
34   } else {
35     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
36   }
37   PetscFunctionReturn(0);
38 }
39 
40 EXTERN_C_BEGIN
41 #undef __FUNCT__
42 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
43 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
44 {
45   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
46   PetscErrorCode ierr;
47   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5];
48   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25];
49   PetscReal      shift = 0.0;
50 
51   PetscFunctionBegin;
52   if (a->idiagvalid) PetscFunctionReturn(0);
53   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
54   diag_offset = a->diag;
55   if (!a->idiag) {
56     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
57     ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
58   }
59   diag  = a->idiag;
60   mdiag = a->idiag+bs*bs*mbs;
61   /* factor and invert each block */
62   switch (bs){
63     case 1:
64       for (i=0; i<mbs; i++) {
65         odiag = v + 1*diag_offset[i];
66         diag[0]  = odiag[0];
67         mdiag[0] = odiag[0];
68         diag[0]  = 1.0 / (diag[0] + shift);
69         diag    += 1;
70         mdiag   += 1;
71       }
72       break;
73     case 2:
74       for (i=0; i<mbs; i++) {
75         odiag   = v + 4*diag_offset[i];
76         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
78 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
79 	diag    += 4;
80 	mdiag   += 4;
81       }
82       break;
83     case 3:
84       for (i=0; i<mbs; i++) {
85         odiag    = v + 9*diag_offset[i];
86         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
87         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
88         diag[8]  = odiag[8];
89         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
90         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
91         mdiag[8] = odiag[8];
92 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
93 	diag    += 9;
94 	mdiag   += 9;
95       }
96       break;
97     case 4:
98       for (i=0; i<mbs; i++) {
99         odiag  = v + 16*diag_offset[i];
100         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
101         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
103 	diag  += 16;
104 	mdiag += 16;
105       }
106       break;
107     case 5:
108       for (i=0; i<mbs; i++) {
109         odiag = v + 25*diag_offset[i];
110         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
111         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
113 	diag  += 25;
114 	mdiag += 25;
115       }
116       break;
117     case 6:
118       for (i=0; i<mbs; i++) {
119         odiag = v + 36*diag_offset[i];
120         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
121         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
123 	diag  += 36;
124 	mdiag += 36;
125       }
126       break;
127     default:
128       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
129   }
130   a->idiagvalid = PETSC_TRUE;
131   PetscFunctionReturn(0);
132 }
133 EXTERN_C_END
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
137 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
138 {
139   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
140   PetscScalar        *x,x1,s1;
141   const PetscScalar  *b;
142   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
143   PetscErrorCode     ierr;
144   PetscInt           m = a->mbs,i,i2,nz,j;
145   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
149   its = its*lits;
150   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
155 
156   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
157 
158   diag  = a->diag;
159   idiag = a->idiag;
160   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
161   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
162 
163   if (flag & SOR_ZERO_INITIAL_GUESS) {
164     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
165       x[0] = b[0]*idiag[0];
166       i2     = 1;
167       idiag += 1;
168       for (i=1; i<m; i++) {
169         v     = aa + ai[i];
170         vi    = aj + ai[i];
171         nz    = diag[i] - ai[i];
172         s1    = b[i2];
173         for (j=0; j<nz; j++) {
174           s1 -= v[j]*x[vi[j]];
175         }
176         x[i2]   = idiag[0]*s1;
177         idiag   += 1;
178         i2      += 1;
179       }
180       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
181       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
182     }
183     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
184         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
185       i2    = 0;
186       mdiag = a->idiag+a->mbs;
187       for (i=0; i<m; i++) {
188         x1      = x[i2];
189         x[i2]   = mdiag[0]*x1;
190         mdiag  += 1;
191         i2     += 1;
192       }
193       ierr = PetscLogFlops(m);CHKERRQ(ierr);
194     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
195       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
196     }
197     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
198       idiag   = a->idiag+a->mbs - 1;
199       i2      = m - 1;
200       x1      = x[i2];
201       x[i2]   = idiag[0]*x1;
202       idiag -= 1;
203       i2    -= 1;
204       for (i=m-2; i>=0; i--) {
205         v     = aa + (diag[i]+1);
206         vi    = aj + diag[i] + 1;
207         nz    = ai[i+1] - diag[i] - 1;
208         s1    = x[i2];
209         for (j=0; j<nz; j++) {
210           s1 -= v[j]*x[vi[j]];
211         }
212         x[i2]   = idiag[0]*s1;
213         idiag   -= 1;
214         i2      -= 1;
215       }
216       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
217     }
218   } else {
219     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
220   }
221   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
222   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
223   PetscFunctionReturn(0);
224 }
225 
226 #undef __FUNCT__
227 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
228 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
229 {
230   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
231   PetscScalar        *x,x1,x2,s1,s2;
232   const PetscScalar  *b;
233   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
234   PetscErrorCode     ierr;
235   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
236   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
237 
238   PetscFunctionBegin;
239   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
240   its = its*lits;
241   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
242   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
243   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
244   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
245   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
246 
247   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
248 
249   diag  = a->diag;
250   idiag = a->idiag;
251   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
252   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
253 
254   if (flag & SOR_ZERO_INITIAL_GUESS) {
255     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
256       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
257       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
258       i2     = 2;
259       idiag += 4;
260       for (i=1; i<m; i++) {
261 	v     = aa + 4*ai[i];
262 	vi    = aj + ai[i];
263 	nz    = diag[i] - ai[i];
264 	s1    = b[i2]; s2 = b[i2+1];
265         for (j=0; j<nz; j++) {
266 	  idx  = 2*vi[j];
267           it   = 4*j;
268 	  x1   = x[idx]; x2 = x[1+idx];
269 	  s1  -= v[it]*x1 + v[it+2]*x2;
270 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
271 	}
272 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
273 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
274         idiag   += 4;
275         i2      += 2;
276       }
277       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
278       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
279     }
280     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
281         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
282       i2    = 0;
283       mdiag = a->idiag+4*a->mbs;
284       for (i=0; i<m; i++) {
285         x1      = x[i2]; x2 = x[i2+1];
286         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
287         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
288         mdiag  += 4;
289         i2     += 2;
290       }
291       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
292     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
293       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
294     }
295     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
296       idiag   = a->idiag+4*a->mbs - 4;
297       i2      = 2*m - 2;
298       x1      = x[i2]; x2 = x[i2+1];
299       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
300       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
301       idiag -= 4;
302       i2    -= 2;
303       for (i=m-2; i>=0; i--) {
304 	v     = aa + 4*(diag[i]+1);
305 	vi    = aj + diag[i] + 1;
306 	nz    = ai[i+1] - diag[i] - 1;
307 	s1    = x[i2]; s2 = x[i2+1];
308         for (j=0; j<nz; j++) {
309  	  idx  = 2*vi[j];
310           it   = 4*j;
311 	  x1   = x[idx]; x2 = x[1+idx];
312 	  s1  -= v[it]*x1 + v[it+2]*x2;
313 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
314 	}
315 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
316 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
317         idiag   -= 4;
318         i2      -= 2;
319       }
320       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
321     }
322   } else {
323     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
324   }
325   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
326   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
332 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
333 {
334   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
335   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
336   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
337   const PetscScalar  *b;
338   PetscErrorCode     ierr;
339   PetscInt           m = a->mbs,i,i2,nz,idx;
340   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
341 
342   PetscFunctionBegin;
343   its = its*lits;
344   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
345   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
346   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
347   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
348   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
349   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
350 
351   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
352 
353   diag  = a->diag;
354   idiag = a->idiag;
355   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
356   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
357 
358   if (flag & SOR_ZERO_INITIAL_GUESS) {
359     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
360       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
361       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
362       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
363       i2     = 3;
364       idiag += 9;
365       for (i=1; i<m; i++) {
366         v     = aa + 9*ai[i];
367         vi    = aj + ai[i];
368         nz    = diag[i] - ai[i];
369         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
370         while (nz--) {
371           idx  = 3*(*vi++);
372           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
373           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
374           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
375           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
376           v   += 9;
377         }
378         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
379         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
380         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
381         idiag   += 9;
382         i2      += 3;
383       }
384       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
385       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
386     }
387     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
388         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
389       i2    = 0;
390       mdiag = a->idiag+9*a->mbs;
391       for (i=0; i<m; i++) {
392         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
393         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
394         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
395         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
396         mdiag  += 9;
397         i2     += 3;
398       }
399       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
400     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404       idiag   = a->idiag+9*a->mbs - 9;
405       i2      = 3*m - 3;
406       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
407       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
408       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
409       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
410       idiag -= 9;
411       i2    -= 3;
412       for (i=m-2; i>=0; i--) {
413         v     = aa + 9*(diag[i]+1);
414         vi    = aj + diag[i] + 1;
415         nz    = ai[i+1] - diag[i] - 1;
416         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
417         while (nz--) {
418           idx  = 3*(*vi++);
419           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
420           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
421           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
422           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
423           v   += 9;
424         }
425         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
426         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
427         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
428         idiag   -= 9;
429         i2      -= 3;
430       }
431       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
432     }
433   } else {
434     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
435   }
436   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
437   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
443 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
444 {
445   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
446   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
447   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
448   const PetscScalar  *b;
449   PetscErrorCode     ierr;
450   PetscInt           m = a->mbs,i,i2,nz,idx;
451   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
452 
453   PetscFunctionBegin;
454   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
475       i2     = 4;
476       idiag += 16;
477       for (i=1; i<m; i++) {
478 	v     = aa + 16*ai[i];
479 	vi    = aj + ai[i];
480 	nz    = diag[i] - ai[i];
481 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
482 	while (nz--) {
483 	  idx  = 4*(*vi++);
484 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
485 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
486 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
487 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
488 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
489 	  v   += 16;
490 	}
491 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
492 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
493 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
494 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
495         idiag   += 16;
496         i2      += 4;
497       }
498       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
499       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
500     }
501     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
502         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
503       i2    = 0;
504       mdiag = a->idiag+16*a->mbs;
505       for (i=0; i<m; i++) {
506         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
507         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
508         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
509         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
510         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
511         mdiag  += 16;
512         i2     += 4;
513       }
514       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
515     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
516       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
517     }
518     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
519       idiag   = a->idiag+16*a->mbs - 16;
520       i2      = 4*m - 4;
521       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
522       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
523       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
524       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
525       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
526       idiag -= 16;
527       i2    -= 4;
528       for (i=m-2; i>=0; i--) {
529 	v     = aa + 16*(diag[i]+1);
530 	vi    = aj + diag[i] + 1;
531 	nz    = ai[i+1] - diag[i] - 1;
532 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
533 	while (nz--) {
534 	  idx  = 4*(*vi++);
535 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
536 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
537 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
538 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
539 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
540 	  v   += 16;
541 	}
542 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
543 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
544 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
545 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
546         idiag   -= 16;
547         i2      -= 4;
548       }
549       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
550     }
551   } else {
552     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
553   }
554   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
555   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
561 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
562 {
563   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
564   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
565   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
566   const PetscScalar  *b;
567   PetscErrorCode     ierr;
568   PetscInt           m = a->mbs,i,i2,nz,idx;
569   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
570 
571   PetscFunctionBegin;
572   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
573   its = its*lits;
574   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
575   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
576   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
577   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
578   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
579 
580   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
581 
582   diag  = a->diag;
583   idiag = a->idiag;
584   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
585   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
586 
587   if (flag & SOR_ZERO_INITIAL_GUESS) {
588     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
589       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
590       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
591       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
592       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
593       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
594       i2     = 5;
595       idiag += 25;
596       for (i=1; i<m; i++) {
597 	v     = aa + 25*ai[i];
598 	vi    = aj + ai[i];
599 	nz    = diag[i] - ai[i];
600 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
601 	while (nz--) {
602 	  idx  = 5*(*vi++);
603 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
604 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
605 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
606 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
607 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
608 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
609 	  v   += 25;
610 	}
611 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
612 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
613 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
614 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
615 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
616         idiag   += 25;
617         i2      += 5;
618       }
619       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
620       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
621     }
622     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
623         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
624       i2    = 0;
625       mdiag = a->idiag+25*a->mbs;
626       for (i=0; i<m; i++) {
627         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
628         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
629         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
630         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
631         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
632         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
633         mdiag  += 25;
634         i2     += 5;
635       }
636       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
637     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
638       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
639     }
640     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
641       idiag   = a->idiag+25*a->mbs - 25;
642       i2      = 5*m - 5;
643       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
644       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
645       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
646       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
647       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
648       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
649       idiag -= 25;
650       i2    -= 5;
651       for (i=m-2; i>=0; i--) {
652 	v     = aa + 25*(diag[i]+1);
653 	vi    = aj + diag[i] + 1;
654 	nz    = ai[i+1] - diag[i] - 1;
655 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
656 	while (nz--) {
657 	  idx  = 5*(*vi++);
658 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
659 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
660 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
661 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
662 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
663 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
664 	  v   += 25;
665 	}
666 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
667 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
668 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
669 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
670 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
671         idiag   -= 25;
672         i2      -= 5;
673       }
674       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
675     }
676   } else {
677     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
678   }
679   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
680   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 #undef __FUNCT__
685 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
686 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
687 {
688   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
689   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
690   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
691   const PetscScalar  *b;
692   PetscErrorCode     ierr;
693   PetscInt           m = a->mbs,i,i2,nz,idx;
694   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
695 
696   PetscFunctionBegin;
697   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
698   its = its*lits;
699   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
700   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
701   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
702   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
703   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
704 
705   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
706 
707   diag  = a->diag;
708   idiag = a->idiag;
709   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
710   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
711 
712   if (flag & SOR_ZERO_INITIAL_GUESS) {
713     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
714       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
715       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
716       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
717       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
718       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
719       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
720       i2     = 6;
721       idiag += 36;
722       for (i=1; i<m; i++) {
723         v     = aa + 36*ai[i];
724         vi    = aj + ai[i];
725         nz    = diag[i] - ai[i];
726         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
727         while (nz--) {
728           idx  = 6*(*vi++);
729           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
730           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
731           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
732           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
733           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
734           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
735           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
736           v   += 36;
737         }
738         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
739         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
740         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
741         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
742         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
743         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
744         idiag   += 36;
745         i2      += 6;
746       }
747       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
748       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
749     }
750     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
751         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
752       i2    = 0;
753       mdiag = a->idiag+36*a->mbs;
754       for (i=0; i<m; i++) {
755         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
756         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
757         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
758         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
759         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
760         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
761         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
762         mdiag  += 36;
763         i2     += 6;
764       }
765       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
766     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
767       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
768     }
769     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
770       idiag   = a->idiag+36*a->mbs - 36;
771       i2      = 6*m - 6;
772       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
773       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
774       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
775       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
776       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
777       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
778       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
779       idiag -= 36;
780       i2    -= 6;
781       for (i=m-2; i>=0; i--) {
782         v     = aa + 36*(diag[i]+1);
783         vi    = aj + diag[i] + 1;
784         nz    = ai[i+1] - diag[i] - 1;
785         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
786         while (nz--) {
787           idx  = 6*(*vi++);
788           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
789           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
790           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
791           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
792           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
793           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
794           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
795           v   += 36;
796         }
797         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
798         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
799         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
800         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
801         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
802         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
803         idiag   -= 36;
804         i2      -= 6;
805       }
806       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
807     }
808   } else {
809     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
810   }
811   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
812   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
813   PetscFunctionReturn(0);
814 }
815 
816 #undef __FUNCT__
817 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
818 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
819 {
820   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
821   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
822   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
823   const PetscScalar  *b;
824   PetscErrorCode     ierr;
825   PetscInt           m = a->mbs,i,i2,nz,idx;
826   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
827 
828   PetscFunctionBegin;
829   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
830   its = its*lits;
831   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
832   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
833   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
834   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
835   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
836 
837   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
838 
839   diag  = a->diag;
840   idiag = a->idiag;
841   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
842   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
843 
844   if (flag & SOR_ZERO_INITIAL_GUESS) {
845     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
846       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
847       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
848       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
849       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
850       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
851       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
852       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
853       i2     = 7;
854       idiag += 49;
855       for (i=1; i<m; i++) {
856         v     = aa + 49*ai[i];
857         vi    = aj + ai[i];
858         nz    = diag[i] - ai[i];
859         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
860         while (nz--) {
861           idx  = 7*(*vi++);
862           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
863           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
864           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
865           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
866           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
867           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
868           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
869           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
870           v   += 49;
871         }
872         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
873         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
874         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
875         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
876         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
877         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
878         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
879         idiag   += 49;
880         i2      += 7;
881       }
882       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
883       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
884     }
885     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
886         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
887       i2    = 0;
888       mdiag = a->idiag+49*a->mbs;
889       for (i=0; i<m; i++) {
890         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
891         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
892         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
893         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
894         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
895         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
896         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
897         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
898         mdiag  += 36;
899         i2     += 6;
900       }
901       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
902     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
903       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
904     }
905     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
906       idiag   = a->idiag+49*a->mbs - 49;
907       i2      = 7*m - 7;
908       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
909       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
910       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
911       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
912       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
913       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
914       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
915       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
916       idiag -= 49;
917       i2    -= 7;
918       for (i=m-2; i>=0; i--) {
919         v     = aa + 49*(diag[i]+1);
920         vi    = aj + diag[i] + 1;
921         nz    = ai[i+1] - diag[i] - 1;
922         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
923         while (nz--) {
924           idx  = 7*(*vi++);
925           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
926           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
927           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
928           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
929           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
930           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
931           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
932           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
933           v   += 49;
934         }
935         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
936         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
937         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
938         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
939         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
940         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
941         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
942         idiag   -= 49;
943         i2      -= 7;
944       }
945       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
946     }
947   } else {
948     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
949   }
950   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
951   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
952   PetscFunctionReturn(0);
953 }
954 
955 /*
956     Special version for direct calls from Fortran (Used in PETSc-fun3d)
957 */
958 #if defined(PETSC_HAVE_FORTRAN_CAPS)
959 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
961 #define matsetvaluesblocked4_ matsetvaluesblocked4
962 #endif
963 
964 EXTERN_C_BEGIN
965 #undef __FUNCT__
966 #define __FUNCT__ "matsetvaluesblocked4_"
967 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
968 {
969   Mat               A = *AA;
970   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
971   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
972   PetscInt          *ai=a->i,*ailen=a->ilen;
973   PetscInt          *aj=a->j,stepval,lastcol = -1;
974   const PetscScalar *value = v;
975   MatScalar         *ap,*aa = a->a,*bap;
976 
977   PetscFunctionBegin;
978   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
979   stepval = (n-1)*4;
980   for (k=0; k<m; k++) { /* loop over added rows */
981     row  = im[k];
982     rp   = aj + ai[row];
983     ap   = aa + 16*ai[row];
984     nrow = ailen[row];
985     low  = 0;
986     high = nrow;
987     for (l=0; l<n; l++) { /* loop over added columns */
988       col = in[l];
989       if (col <= lastcol) low = 0; else high = nrow;
990       lastcol = col;
991       value = v + k*(stepval+4 + l)*4;
992       while (high-low > 7) {
993         t = (low+high)/2;
994         if (rp[t] > col) high = t;
995         else             low  = t;
996       }
997       for (i=low; i<high; i++) {
998         if (rp[i] > col) break;
999         if (rp[i] == col) {
1000           bap  = ap +  16*i;
1001           for (ii=0; ii<4; ii++,value+=stepval) {
1002             for (jj=ii; jj<16; jj+=4) {
1003               bap[jj] += *value++;
1004             }
1005           }
1006           goto noinsert2;
1007         }
1008       }
1009       N = nrow++ - 1;
1010       high++; /* added new column index thus must search to one higher than before */
1011       /* shift up all the later entries in this row */
1012       for (ii=N; ii>=i; ii--) {
1013         rp[ii+1] = rp[ii];
1014         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1015       }
1016       if (N >= i) {
1017         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1018       }
1019       rp[i] = col;
1020       bap   = ap +  16*i;
1021       for (ii=0; ii<4; ii++,value+=stepval) {
1022         for (jj=ii; jj<16; jj+=4) {
1023           bap[jj] = *value++;
1024         }
1025       }
1026       noinsert2:;
1027       low = i;
1028     }
1029     ailen[row] = nrow;
1030   }
1031   PetscFunctionReturnVoid();
1032 }
1033 EXTERN_C_END
1034 
1035 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1036 #define matsetvalues4_ MATSETVALUES4
1037 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1038 #define matsetvalues4_ matsetvalues4
1039 #endif
1040 
1041 EXTERN_C_BEGIN
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatSetValues4_"
1044 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1045 {
1046   Mat         A = *AA;
1047   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1048   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1049   PetscInt    *ai=a->i,*ailen=a->ilen;
1050   PetscInt    *aj=a->j,brow,bcol;
1051   PetscInt    ridx,cidx,lastcol = -1;
1052   MatScalar   *ap,value,*aa=a->a,*bap;
1053 
1054   PetscFunctionBegin;
1055   for (k=0; k<m; k++) { /* loop over added rows */
1056     row  = im[k]; brow = row/4;
1057     rp   = aj + ai[brow];
1058     ap   = aa + 16*ai[brow];
1059     nrow = ailen[brow];
1060     low  = 0;
1061     high = nrow;
1062     for (l=0; l<n; l++) { /* loop over added columns */
1063       col = in[l]; bcol = col/4;
1064       ridx = row % 4; cidx = col % 4;
1065       value = v[l + k*n];
1066       if (col <= lastcol) low = 0; else high = nrow;
1067       lastcol = col;
1068       while (high-low > 7) {
1069         t = (low+high)/2;
1070         if (rp[t] > bcol) high = t;
1071         else              low  = t;
1072       }
1073       for (i=low; i<high; i++) {
1074         if (rp[i] > bcol) break;
1075         if (rp[i] == bcol) {
1076           bap  = ap +  16*i + 4*cidx + ridx;
1077           *bap += value;
1078           goto noinsert1;
1079         }
1080       }
1081       N = nrow++ - 1;
1082       high++; /* added new column thus must search to one higher than before */
1083       /* shift up all the later entries in this row */
1084       for (ii=N; ii>=i; ii--) {
1085         rp[ii+1] = rp[ii];
1086         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1087       }
1088       if (N>=i) {
1089         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1090       }
1091       rp[i]                    = bcol;
1092       ap[16*i + 4*cidx + ridx] = value;
1093       noinsert1:;
1094       low = i;
1095     }
1096     ailen[brow] = nrow;
1097   }
1098   PetscFunctionReturnVoid();
1099 }
1100 EXTERN_C_END
1101 
1102 /*
1103      Checks for missing diagonals
1104 */
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1107 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1108 {
1109   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1110   PetscErrorCode ierr;
1111   PetscInt       *diag,*jj = a->j,i;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1115   *missing = PETSC_FALSE;
1116   if (A->rmap->n > 0 && !jj) {
1117     *missing  = PETSC_TRUE;
1118     if (d) *d = 0;
1119     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1120   } else {
1121     diag     = a->diag;
1122     for (i=0; i<a->mbs; i++) {
1123       if (jj[diag[i]] != i) {
1124         *missing  = PETSC_TRUE;
1125         if (d) *d = i;
1126         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1127       }
1128     }
1129   }
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1135 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1136 {
1137   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1138   PetscErrorCode ierr;
1139   PetscInt       i,j,m = a->mbs;
1140 
1141   PetscFunctionBegin;
1142   if (!a->diag) {
1143     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1144     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1145     a->free_diag = PETSC_TRUE;
1146   }
1147   for (i=0; i<m; i++) {
1148     a->diag[i] = a->i[i+1];
1149     for (j=a->i[i]; j<a->i[i+1]; j++) {
1150       if (a->j[j] == i) {
1151         a->diag[i] = j;
1152         break;
1153       }
1154     }
1155   }
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 
1160 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1164 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1165 {
1166   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1167   PetscErrorCode ierr;
1168   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1169   PetscInt       *tia, *tja;
1170 
1171   PetscFunctionBegin;
1172   *nn = n;
1173   if (!ia) PetscFunctionReturn(0);
1174   if (symmetric) {
1175     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1176     nz   = tia[n];
1177   } else {
1178     tia = a->i; tja = a->j;
1179   }
1180 
1181   if (!blockcompressed && bs > 1) {
1182     (*nn) *= bs;
1183     nbs    = bs;
1184     /* malloc & create the natural set of indices */
1185     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1186     if (n) {
1187       (*ia)[0] = 0;
1188       for (j=1; j<bs; j++) {
1189         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1190       }
1191     }
1192 
1193     for (i=1; i<n; i++) {
1194       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1195       for (j=1; j<bs; j++) {
1196         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1197       }
1198     }
1199     if (n) {
1200       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1201     }
1202 
1203     if (ja) {
1204       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1205       cnt = 0;
1206       for (i=0; i<n; i++) {
1207         for (j=0; j<bs; j++) {
1208           for (k=tia[i]; k<tia[i+1]; k++) {
1209             for (l=0; l<bs; l++) {
1210               (*ja)[cnt++] = bs*tja[k] + l;
1211 	    }
1212           }
1213         }
1214       }
1215     }
1216 
1217     n     *= bs;
1218     nz *= bs*bs;
1219     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1220       ierr = PetscFree(tia);CHKERRQ(ierr);
1221       ierr = PetscFree(tja);CHKERRQ(ierr);
1222     }
1223   } else if (oshift == 1) {
1224     if (symmetric) {
1225       PetscInt nz = tia[A->rmap->n/bs];
1226       /*  add 1 to i and j indices */
1227       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1228       *ia = tia;
1229       if (ja) {
1230 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1231         *ja = tja;
1232       }
1233     } else {
1234       PetscInt nz = a->i[A->rmap->n/bs];
1235       /* malloc space and  add 1 to i and j indices */
1236       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1237       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1238       if (ja) {
1239 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1240 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1241       }
1242     }
1243   } else {
1244     *ia = tia;
1245     if (ja) *ja = tja;
1246   }
1247 
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1253 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1254 {
1255   PetscErrorCode ierr;
1256 
1257   PetscFunctionBegin;
1258   if (!ia) PetscFunctionReturn(0);
1259   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1260     ierr = PetscFree(*ia);CHKERRQ(ierr);
1261     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1262   }
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1268 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1269 {
1270   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274 #if defined(PETSC_USE_LOG)
1275   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1276 #endif
1277   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1278   if (a->row) {
1279     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1280   }
1281   if (a->col) {
1282     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1283   }
1284   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1285   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1286   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1287   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1288   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1289   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1290   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1291   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1292   if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1293 
1294   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1295   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1296   ierr = PetscFree(a);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1312 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1313 {
1314   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   switch (op) {
1319   case MAT_ROW_ORIENTED:
1320     a->roworiented    = flg;
1321     break;
1322   case MAT_KEEP_NONZERO_PATTERN:
1323     a->keepnonzeropattern = flg;
1324     break;
1325   case MAT_NEW_NONZERO_LOCATIONS:
1326     a->nonew          = (flg ? 0 : 1);
1327     break;
1328   case MAT_NEW_NONZERO_LOCATION_ERR:
1329     a->nonew          = (flg ? -1 : 0);
1330     break;
1331   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1332     a->nonew          = (flg ? -2 : 0);
1333     break;
1334   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1335     a->nounused       = (flg ? -1 : 0);
1336     break;
1337   case MAT_NEW_DIAGONALS:
1338   case MAT_IGNORE_OFF_PROC_ENTRIES:
1339   case MAT_USE_HASH_TABLE:
1340     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1341     break;
1342   case MAT_SYMMETRIC:
1343   case MAT_STRUCTURALLY_SYMMETRIC:
1344   case MAT_HERMITIAN:
1345   case MAT_SYMMETRY_ETERNAL:
1346     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1347     break;
1348   default:
1349     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1350   }
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1356 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1357 {
1358   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1359   PetscErrorCode ierr;
1360   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1361   MatScalar      *aa,*aa_i;
1362   PetscScalar    *v_i;
1363 
1364   PetscFunctionBegin;
1365   bs  = A->rmap->bs;
1366   ai  = a->i;
1367   aj  = a->j;
1368   aa  = a->a;
1369   bs2 = a->bs2;
1370 
1371   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1372 
1373   bn  = row/bs;   /* Block number */
1374   bp  = row % bs; /* Block Position */
1375   M   = ai[bn+1] - ai[bn];
1376   *nz = bs*M;
1377 
1378   if (v) {
1379     *v = 0;
1380     if (*nz) {
1381       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1382       for (i=0; i<M; i++) { /* for each block in the block row */
1383         v_i  = *v + i*bs;
1384         aa_i = aa + bs2*(ai[bn] + i);
1385         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1386       }
1387     }
1388   }
1389 
1390   if (idx) {
1391     *idx = 0;
1392     if (*nz) {
1393       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1394       for (i=0; i<M; i++) { /* for each block in the block row */
1395         idx_i = *idx + i*bs;
1396         itmp  = bs*aj[ai[bn] + i];
1397         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1398       }
1399     }
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1406 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1407 {
1408   PetscErrorCode ierr;
1409 
1410   PetscFunctionBegin;
1411   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1412   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1420 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1421 {
1422   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1423   Mat            C;
1424   PetscErrorCode ierr;
1425   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1426   PetscInt       *rows,*cols,bs2=a->bs2;
1427   MatScalar      *array;
1428 
1429   PetscFunctionBegin;
1430   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1431   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1432     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1433     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1434 
1435     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1436     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1437     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1438     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1439     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1440     ierr = PetscFree(col);CHKERRQ(ierr);
1441   } else {
1442     C = *B;
1443   }
1444 
1445   array = a->a;
1446   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1447   for (i=0; i<mbs; i++) {
1448     cols[0] = i*bs;
1449     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1450     len = ai[i+1] - ai[i];
1451     for (j=0; j<len; j++) {
1452       rows[0] = (*aj++)*bs;
1453       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1454       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1455       array += bs2;
1456     }
1457   }
1458   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1459 
1460   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1461   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1462 
1463   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1464     *B = C;
1465   } else {
1466     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1467   }
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1473 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1474 {
1475   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1476   PetscErrorCode ierr;
1477   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1478   int            fd;
1479   PetscScalar    *aa;
1480   FILE           *file;
1481 
1482   PetscFunctionBegin;
1483   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1484   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1485   col_lens[0] = MAT_FILE_COOKIE;
1486 
1487   col_lens[1] = A->rmap->N;
1488   col_lens[2] = A->cmap->n;
1489   col_lens[3] = a->nz*bs2;
1490 
1491   /* store lengths of each row and write (including header) to file */
1492   count = 0;
1493   for (i=0; i<a->mbs; i++) {
1494     for (j=0; j<bs; j++) {
1495       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1496     }
1497   }
1498   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1499   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1500 
1501   /* store column indices (zero start index) */
1502   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1503   count = 0;
1504   for (i=0; i<a->mbs; i++) {
1505     for (j=0; j<bs; j++) {
1506       for (k=a->i[i]; k<a->i[i+1]; k++) {
1507         for (l=0; l<bs; l++) {
1508           jj[count++] = bs*a->j[k] + l;
1509         }
1510       }
1511     }
1512   }
1513   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1514   ierr = PetscFree(jj);CHKERRQ(ierr);
1515 
1516   /* store nonzero values */
1517   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1518   count = 0;
1519   for (i=0; i<a->mbs; i++) {
1520     for (j=0; j<bs; j++) {
1521       for (k=a->i[i]; k<a->i[i+1]; k++) {
1522         for (l=0; l<bs; l++) {
1523           aa[count++] = a->a[bs2*k + l*bs + j];
1524         }
1525       }
1526     }
1527   }
1528   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1529   ierr = PetscFree(aa);CHKERRQ(ierr);
1530 
1531   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1532   if (file) {
1533     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1534   }
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1540 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1541 {
1542   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1543   PetscErrorCode    ierr;
1544   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1545   PetscViewerFormat format;
1546 
1547   PetscFunctionBegin;
1548   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1549   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1550     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1551   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1552     Mat aij;
1553     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1554     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1555     ierr = MatDestroy(aij);CHKERRQ(ierr);
1556   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1557      PetscFunctionReturn(0);
1558   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1559     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1560     for (i=0; i<a->mbs; i++) {
1561       for (j=0; j<bs; j++) {
1562         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1563         for (k=a->i[i]; k<a->i[i+1]; k++) {
1564           for (l=0; l<bs; l++) {
1565 #if defined(PETSC_USE_COMPLEX)
1566             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1568                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1571                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1572             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1573               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1574             }
1575 #else
1576             if (a->a[bs2*k + l*bs + j] != 0.0) {
1577               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1578             }
1579 #endif
1580           }
1581         }
1582         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1583       }
1584     }
1585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1586   } else {
1587     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1588     for (i=0; i<a->mbs; i++) {
1589       for (j=0; j<bs; j++) {
1590         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1591         for (k=a->i[i]; k<a->i[i+1]; k++) {
1592           for (l=0; l<bs; l++) {
1593 #if defined(PETSC_USE_COMPLEX)
1594             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1595               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1596                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1597             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1598               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1599                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1600             } else {
1601               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1602             }
1603 #else
1604             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1605 #endif
1606           }
1607         }
1608         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1609       }
1610     }
1611     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1612   }
1613   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1619 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1620 {
1621   Mat            A = (Mat) Aa;
1622   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1623   PetscErrorCode ierr;
1624   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1625   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1626   MatScalar      *aa;
1627   PetscViewer    viewer;
1628 
1629   PetscFunctionBegin;
1630 
1631   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1632   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1633 
1634   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1635 
1636   /* loop over matrix elements drawing boxes */
1637   color = PETSC_DRAW_BLUE;
1638   for (i=0,row=0; i<mbs; i++,row+=bs) {
1639     for (j=a->i[i]; j<a->i[i+1]; j++) {
1640       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1641       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1642       aa = a->a + j*bs2;
1643       for (k=0; k<bs; k++) {
1644         for (l=0; l<bs; l++) {
1645           if (PetscRealPart(*aa++) >=  0.) continue;
1646           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1647         }
1648       }
1649     }
1650   }
1651   color = PETSC_DRAW_CYAN;
1652   for (i=0,row=0; i<mbs; i++,row+=bs) {
1653     for (j=a->i[i]; j<a->i[i+1]; j++) {
1654       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1655       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1656       aa = a->a + j*bs2;
1657       for (k=0; k<bs; k++) {
1658         for (l=0; l<bs; l++) {
1659           if (PetscRealPart(*aa++) != 0.) continue;
1660           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1661         }
1662       }
1663     }
1664   }
1665 
1666   color = PETSC_DRAW_RED;
1667   for (i=0,row=0; i<mbs; i++,row+=bs) {
1668     for (j=a->i[i]; j<a->i[i+1]; j++) {
1669       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1670       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1671       aa = a->a + j*bs2;
1672       for (k=0; k<bs; k++) {
1673         for (l=0; l<bs; l++) {
1674           if (PetscRealPart(*aa++) <= 0.) continue;
1675           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1676         }
1677       }
1678     }
1679   }
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1685 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1686 {
1687   PetscErrorCode ierr;
1688   PetscReal      xl,yl,xr,yr,w,h;
1689   PetscDraw      draw;
1690   PetscTruth     isnull;
1691 
1692   PetscFunctionBegin;
1693 
1694   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1695   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1696 
1697   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1698   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1699   xr += w;    yr += h;  xl = -w;     yl = -h;
1700   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1701   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1702   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatView_SeqBAIJ"
1708 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1709 {
1710   PetscErrorCode ierr;
1711   PetscTruth     iascii,isbinary,isdraw;
1712 
1713   PetscFunctionBegin;
1714   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1715   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1716   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1717   if (iascii){
1718     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1719   } else if (isbinary) {
1720     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1721   } else if (isdraw) {
1722     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1723   } else {
1724     Mat B;
1725     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1726     ierr = MatView(B,viewer);CHKERRQ(ierr);
1727     ierr = MatDestroy(B);CHKERRQ(ierr);
1728   }
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1735 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1736 {
1737   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1738   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1739   PetscInt    *ai = a->i,*ailen = a->ilen;
1740   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1741   MatScalar   *ap,*aa = a->a;
1742 
1743   PetscFunctionBegin;
1744   for (k=0; k<m; k++) { /* loop over rows */
1745     row  = im[k]; brow = row/bs;
1746     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1747     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1748     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1749     nrow = ailen[brow];
1750     for (l=0; l<n; l++) { /* loop over columns */
1751       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1752       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1753       col  = in[l] ;
1754       bcol = col/bs;
1755       cidx = col%bs;
1756       ridx = row%bs;
1757       high = nrow;
1758       low  = 0; /* assume unsorted */
1759       while (high-low > 5) {
1760         t = (low+high)/2;
1761         if (rp[t] > bcol) high = t;
1762         else             low  = t;
1763       }
1764       for (i=low; i<high; i++) {
1765         if (rp[i] > bcol) break;
1766         if (rp[i] == bcol) {
1767           *v++ = ap[bs2*i+bs*cidx+ridx];
1768           goto finished;
1769         }
1770       }
1771       *v++ = 0.0;
1772       finished:;
1773     }
1774   }
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #define CHUNKSIZE 10
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1781 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1782 {
1783   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1784   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1785   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1786   PetscErrorCode    ierr;
1787   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1788   PetscTruth        roworiented=a->roworiented;
1789   const PetscScalar *value = v;
1790   MatScalar         *ap,*aa = a->a,*bap;
1791 
1792   PetscFunctionBegin;
1793   if (roworiented) {
1794     stepval = (n-1)*bs;
1795   } else {
1796     stepval = (m-1)*bs;
1797   }
1798   for (k=0; k<m; k++) { /* loop over added rows */
1799     row  = im[k];
1800     if (row < 0) continue;
1801 #if defined(PETSC_USE_DEBUG)
1802     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1803 #endif
1804     rp   = aj + ai[row];
1805     ap   = aa + bs2*ai[row];
1806     rmax = imax[row];
1807     nrow = ailen[row];
1808     low  = 0;
1809     high = nrow;
1810     for (l=0; l<n; l++) { /* loop over added columns */
1811       if (in[l] < 0) continue;
1812 #if defined(PETSC_USE_DEBUG)
1813       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1814 #endif
1815       col = in[l];
1816       if (roworiented) {
1817         value = v + k*(stepval+bs)*bs + l*bs;
1818       } else {
1819         value = v + l*(stepval+bs)*bs + k*bs;
1820       }
1821       if (col <= lastcol) low = 0; else high = nrow;
1822       lastcol = col;
1823       while (high-low > 7) {
1824         t = (low+high)/2;
1825         if (rp[t] > col) high = t;
1826         else             low  = t;
1827       }
1828       for (i=low; i<high; i++) {
1829         if (rp[i] > col) break;
1830         if (rp[i] == col) {
1831           bap  = ap +  bs2*i;
1832           if (roworiented) {
1833             if (is == ADD_VALUES) {
1834               for (ii=0; ii<bs; ii++,value+=stepval) {
1835                 for (jj=ii; jj<bs2; jj+=bs) {
1836                   bap[jj] += *value++;
1837                 }
1838               }
1839             } else {
1840               for (ii=0; ii<bs; ii++,value+=stepval) {
1841                 for (jj=ii; jj<bs2; jj+=bs) {
1842                   bap[jj] = *value++;
1843                 }
1844               }
1845             }
1846           } else {
1847             if (is == ADD_VALUES) {
1848               for (ii=0; ii<bs; ii++,value+=stepval) {
1849                 for (jj=0; jj<bs; jj++) {
1850                   *bap++ += *value++;
1851                 }
1852               }
1853             } else {
1854               for (ii=0; ii<bs; ii++,value+=stepval) {
1855                 for (jj=0; jj<bs; jj++) {
1856                   *bap++  = *value++;
1857                 }
1858               }
1859             }
1860           }
1861           goto noinsert2;
1862         }
1863       }
1864       if (nonew == 1) goto noinsert2;
1865       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1866       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1867       N = nrow++ - 1; high++;
1868       /* shift up all the later entries in this row */
1869       for (ii=N; ii>=i; ii--) {
1870         rp[ii+1] = rp[ii];
1871         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1872       }
1873       if (N >= i) {
1874         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1875       }
1876       rp[i] = col;
1877       bap   = ap +  bs2*i;
1878       if (roworiented) {
1879         for (ii=0; ii<bs; ii++,value+=stepval) {
1880           for (jj=ii; jj<bs2; jj+=bs) {
1881             bap[jj] = *value++;
1882           }
1883         }
1884       } else {
1885         for (ii=0; ii<bs; ii++,value+=stepval) {
1886           for (jj=0; jj<bs; jj++) {
1887             *bap++  = *value++;
1888           }
1889         }
1890       }
1891       noinsert2:;
1892       low = i;
1893     }
1894     ailen[row] = nrow;
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1901 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1902 {
1903   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1904   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1905   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1906   PetscErrorCode ierr;
1907   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1908   MatScalar      *aa = a->a,*ap;
1909   PetscReal      ratio=0.6;
1910 
1911   PetscFunctionBegin;
1912   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1913 
1914   if (m) rmax = ailen[0];
1915   for (i=1; i<mbs; i++) {
1916     /* move each row back by the amount of empty slots (fshift) before it*/
1917     fshift += imax[i-1] - ailen[i-1];
1918     rmax   = PetscMax(rmax,ailen[i]);
1919     if (fshift) {
1920       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1921       N = ailen[i];
1922       for (j=0; j<N; j++) {
1923         ip[j-fshift] = ip[j];
1924         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1925       }
1926     }
1927     ai[i] = ai[i-1] + ailen[i-1];
1928   }
1929   if (mbs) {
1930     fshift += imax[mbs-1] - ailen[mbs-1];
1931     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1932   }
1933   /* reset ilen and imax for each row */
1934   for (i=0; i<mbs; i++) {
1935     ailen[i] = imax[i] = ai[i+1] - ai[i];
1936   }
1937   a->nz = ai[mbs];
1938 
1939   /* diagonals may have moved, so kill the diagonal pointers */
1940   a->idiagvalid = PETSC_FALSE;
1941   if (fshift && a->diag) {
1942     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1943     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1944     a->diag = 0;
1945   }
1946   if (fshift && a->nounused == -1) {
1947     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1948   }
1949   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1950   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1951   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1952   a->reallocs          = 0;
1953   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1954 
1955   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1956   if (a->compressedrow.use){
1957     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1958   }
1959 
1960   A->same_nonzero = PETSC_TRUE;
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 /*
1965    This function returns an array of flags which indicate the locations of contiguous
1966    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1967    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1968    Assume: sizes should be long enough to hold all the values.
1969 */
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1972 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1973 {
1974   PetscInt   i,j,k,row;
1975   PetscTruth flg;
1976 
1977   PetscFunctionBegin;
1978   for (i=0,j=0; i<n; j++) {
1979     row = idx[i];
1980     if (row%bs!=0) { /* Not the begining of a block */
1981       sizes[j] = 1;
1982       i++;
1983     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1984       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1985       i++;
1986     } else { /* Begining of the block, so check if the complete block exists */
1987       flg = PETSC_TRUE;
1988       for (k=1; k<bs; k++) {
1989         if (row+k != idx[i+k]) { /* break in the block */
1990           flg = PETSC_FALSE;
1991           break;
1992         }
1993       }
1994       if (flg) { /* No break in the bs */
1995         sizes[j] = bs;
1996         i+= bs;
1997       } else {
1998         sizes[j] = 1;
1999         i++;
2000       }
2001     }
2002   }
2003   *bs_max = j;
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2009 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2010 {
2011   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2012   PetscErrorCode ierr;
2013   PetscInt       i,j,k,count,*rows;
2014   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2015   PetscScalar    zero = 0.0;
2016   MatScalar      *aa;
2017 
2018   PetscFunctionBegin;
2019   /* Make a copy of the IS and  sort it */
2020   /* allocate memory for rows,sizes */
2021   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2022 
2023   /* copy IS values to rows, and sort them */
2024   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2025   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2026   if (baij->keepnonzeropattern) {
2027     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2028     bs_max = is_n;
2029     A->same_nonzero = PETSC_TRUE;
2030   } else {
2031     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2032     A->same_nonzero = PETSC_FALSE;
2033   }
2034 
2035   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2036     row   = rows[j];
2037     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2038     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2039     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2040     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2041       if (diag != 0.0) {
2042         if (baij->ilen[row/bs] > 0) {
2043           baij->ilen[row/bs]       = 1;
2044           baij->j[baij->i[row/bs]] = row/bs;
2045           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2046         }
2047         /* Now insert all the diagonal values for this bs */
2048         for (k=0; k<bs; k++) {
2049           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2050         }
2051       } else { /* (diag == 0.0) */
2052         baij->ilen[row/bs] = 0;
2053       } /* end (diag == 0.0) */
2054     } else { /* (sizes[i] != bs) */
2055 #if defined (PETSC_USE_DEBUG)
2056       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2057 #endif
2058       for (k=0; k<count; k++) {
2059         aa[0] =  zero;
2060         aa    += bs;
2061       }
2062       if (diag != 0.0) {
2063         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2064       }
2065     }
2066   }
2067 
2068   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2069   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 #undef __FUNCT__
2074 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2075 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2076 {
2077   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2078   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2079   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2080   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2081   PetscErrorCode ierr;
2082   PetscInt       ridx,cidx,bs2=a->bs2;
2083   PetscTruth     roworiented=a->roworiented;
2084   MatScalar      *ap,value,*aa=a->a,*bap;
2085 
2086   PetscFunctionBegin;
2087   if (v) PetscValidScalarPointer(v,6);
2088   for (k=0; k<m; k++) { /* loop over added rows */
2089     row  = im[k];
2090     brow = row/bs;
2091     if (row < 0) continue;
2092 #if defined(PETSC_USE_DEBUG)
2093     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2094 #endif
2095     rp   = aj + ai[brow];
2096     ap   = aa + bs2*ai[brow];
2097     rmax = imax[brow];
2098     nrow = ailen[brow];
2099     low  = 0;
2100     high = nrow;
2101     for (l=0; l<n; l++) { /* loop over added columns */
2102       if (in[l] < 0) continue;
2103 #if defined(PETSC_USE_DEBUG)
2104       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2105 #endif
2106       col = in[l]; bcol = col/bs;
2107       ridx = row % bs; cidx = col % bs;
2108       if (roworiented) {
2109         value = v[l + k*n];
2110       } else {
2111         value = v[k + l*m];
2112       }
2113       if (col <= lastcol) low = 0; else high = nrow;
2114       lastcol = col;
2115       while (high-low > 7) {
2116         t = (low+high)/2;
2117         if (rp[t] > bcol) high = t;
2118         else              low  = t;
2119       }
2120       for (i=low; i<high; i++) {
2121         if (rp[i] > bcol) break;
2122         if (rp[i] == bcol) {
2123           bap  = ap +  bs2*i + bs*cidx + ridx;
2124           if (is == ADD_VALUES) *bap += value;
2125           else                  *bap  = value;
2126           goto noinsert1;
2127         }
2128       }
2129       if (nonew == 1) goto noinsert1;
2130       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2131       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2132       N = nrow++ - 1; high++;
2133       /* shift up all the later entries in this row */
2134       for (ii=N; ii>=i; ii--) {
2135         rp[ii+1] = rp[ii];
2136         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2137       }
2138       if (N>=i) {
2139         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2140       }
2141       rp[i]                      = bcol;
2142       ap[bs2*i + bs*cidx + ridx] = value;
2143       a->nz++;
2144       noinsert1:;
2145       low = i;
2146     }
2147     ailen[brow] = nrow;
2148   }
2149   A->same_nonzero = PETSC_FALSE;
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2155 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2156 {
2157   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2158   Mat            outA;
2159   PetscErrorCode ierr;
2160   PetscTruth     row_identity,col_identity;
2161 
2162   PetscFunctionBegin;
2163   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2164   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2165   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2166   if (!row_identity || !col_identity) {
2167     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2168   }
2169 
2170   outA          = inA;
2171   inA->factor   = MAT_FACTOR_LU;
2172 
2173   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2174 
2175   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2176   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2177   a->row = row;
2178   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2179   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2180   a->col = col;
2181 
2182   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2183   if (a->icol) {
2184     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2185   }
2186   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2187   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2188 
2189   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2190   if (!a->solve_work) {
2191     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2192     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2193   }
2194   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2195 
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 EXTERN_C_BEGIN
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2202 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2203 {
2204   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2205   PetscInt    i,nz,mbs;
2206 
2207   PetscFunctionBegin;
2208   nz  = baij->maxnz/baij->bs2;
2209   mbs = baij->mbs;
2210   for (i=0; i<nz; i++) {
2211     baij->j[i] = indices[i];
2212   }
2213   baij->nz = nz;
2214   for (i=0; i<mbs; i++) {
2215     baij->ilen[i] = baij->imax[i];
2216   }
2217   PetscFunctionReturn(0);
2218 }
2219 EXTERN_C_END
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2223 /*@
2224     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2225        in the matrix.
2226 
2227   Input Parameters:
2228 +  mat - the SeqBAIJ matrix
2229 -  indices - the column indices
2230 
2231   Level: advanced
2232 
2233   Notes:
2234     This can be called if you have precomputed the nonzero structure of the
2235   matrix and want to provide it to the matrix object to improve the performance
2236   of the MatSetValues() operation.
2237 
2238     You MUST have set the correct numbers of nonzeros per row in the call to
2239   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2240 
2241     MUST be called before any calls to MatSetValues();
2242 
2243 @*/
2244 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2245 {
2246   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2247 
2248   PetscFunctionBegin;
2249   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2250   PetscValidPointer(indices,2);
2251   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2252   if (f) {
2253     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2254   } else {
2255     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2256   }
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2262 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2263 {
2264   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2265   PetscErrorCode ierr;
2266   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2267   PetscReal      atmp;
2268   PetscScalar    *x,zero = 0.0;
2269   MatScalar      *aa;
2270   PetscInt       ncols,brow,krow,kcol;
2271 
2272   PetscFunctionBegin;
2273   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2274   bs   = A->rmap->bs;
2275   aa   = a->a;
2276   ai   = a->i;
2277   aj   = a->j;
2278   mbs  = a->mbs;
2279 
2280   ierr = VecSet(v,zero);CHKERRQ(ierr);
2281   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2282   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2283   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2284   for (i=0; i<mbs; i++) {
2285     ncols = ai[1] - ai[0]; ai++;
2286     brow  = bs*i;
2287     for (j=0; j<ncols; j++){
2288       for (kcol=0; kcol<bs; kcol++){
2289         for (krow=0; krow<bs; krow++){
2290           atmp = PetscAbsScalar(*aa);aa++;
2291           row = brow + krow;    /* row index */
2292           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2293           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2294         }
2295       }
2296       aj++;
2297     }
2298   }
2299   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatCopy_SeqBAIJ"
2305 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2306 {
2307   PetscErrorCode ierr;
2308 
2309   PetscFunctionBegin;
2310   /* If the two matrices have the same copy implementation, use fast copy. */
2311   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2312     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2313     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2314 
2315     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2316       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2317     }
2318     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2319   } else {
2320     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2321   }
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2327 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2328 {
2329   PetscErrorCode ierr;
2330 
2331   PetscFunctionBegin;
2332   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 #undef __FUNCT__
2337 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2338 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2339 {
2340   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2341   PetscFunctionBegin;
2342   *array = a->a;
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2348 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2349 {
2350   PetscFunctionBegin;
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 #undef __FUNCT__
2355 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2356 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2357 {
2358   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2359   PetscErrorCode ierr;
2360   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2361   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2362 
2363   PetscFunctionBegin;
2364   if (str == SAME_NONZERO_PATTERN) {
2365     PetscScalar alpha = a;
2366     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2367   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2368     if (y->xtoy && y->XtoY != X) {
2369       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2370       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2371     }
2372     if (!y->xtoy) { /* get xtoy */
2373       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2374       y->XtoY = X;
2375       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2376     }
2377     bs2 = bs*bs;
2378     for (i=0; i<x->nz; i++) {
2379       j = 0;
2380       while (j < bs2){
2381         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2382         j++;
2383       }
2384     }
2385     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2386   } else {
2387     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2388   }
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 #undef __FUNCT__
2393 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2394 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2395 {
2396   PetscInt rbs,cbs;
2397   PetscErrorCode ierr;
2398 
2399   PetscFunctionBegin;
2400   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2401   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2402   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2403   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 #undef __FUNCT__
2408 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2409 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2410 {
2411   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2412   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2413   MatScalar      *aa = a->a;
2414 
2415   PetscFunctionBegin;
2416   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2422 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2423 {
2424   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2425   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2426   MatScalar      *aa = a->a;
2427 
2428   PetscFunctionBegin;
2429   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2434 
2435 #undef __FUNCT__
2436 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2437 /*
2438     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2439 */
2440 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2441 {
2442   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2443   PetscErrorCode ierr;
2444   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2445   PetscInt       nz = a->i[m],row,*jj,mr,col;
2446 
2447   PetscFunctionBegin;
2448   *nn = n;
2449   if (!ia) PetscFunctionReturn(0);
2450   if (symmetric) {
2451     SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices");
2452   } else {
2453     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2454     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2455     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2456     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2457     jj = a->j;
2458     for (i=0; i<nz; i++) {
2459       collengths[jj[i]]++;
2460     }
2461     cia[0] = oshift;
2462     for (i=0; i<n; i++) {
2463       cia[i+1] = cia[i] + collengths[i];
2464     }
2465     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2466     jj   = a->j;
2467     for (row=0; row<m; row++) {
2468       mr = a->i[row+1] - a->i[row];
2469       for (i=0; i<mr; i++) {
2470         col = *jj++;
2471         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2472       }
2473     }
2474     ierr = PetscFree(collengths);CHKERRQ(ierr);
2475     *ia = cia; *ja = cja;
2476   }
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2482 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2483 {
2484   PetscErrorCode ierr;
2485 
2486   PetscFunctionBegin;
2487   if (!ia) PetscFunctionReturn(0);
2488   ierr = PetscFree(*ia);CHKERRQ(ierr);
2489   ierr = PetscFree(*ja);CHKERRQ(ierr);
2490   PetscFunctionReturn(0);
2491 }
2492 
2493 #undef __FUNCT__
2494 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2495 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2496 {
2497   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2498   PetscErrorCode ierr;
2499   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2500   PetscScalar    dx,*y,*xx,*w3_array;
2501   PetscScalar    *vscale_array;
2502   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2503   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2504   void           *fctx = coloring->fctx;
2505   PetscTruth     flg = PETSC_FALSE;
2506   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2507   Vec            x1_tmp;
2508 
2509   PetscFunctionBegin;
2510   PetscValidHeaderSpecific(J,MAT_COOKIE,1);
2511   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2);
2512   PetscValidHeaderSpecific(x1,VEC_COOKIE,3);
2513   if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2514 
2515   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2516   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2517   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2518   if (flg) {
2519     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2520   } else {
2521     PetscTruth assembled;
2522     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2523     if (assembled) {
2524       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2525     }
2526   }
2527 
2528   x1_tmp = x1;
2529   if (!coloring->vscale){
2530     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2531   }
2532 
2533   /*
2534     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2535     coloring->F for the coarser grids from the finest
2536   */
2537   if (coloring->F) {
2538     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2539     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2540     if (m1 != m2) {
2541       coloring->F = 0;
2542       }
2543     }
2544 
2545   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2546     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2547   }
2548   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2549 
2550   /* Set w1 = F(x1) */
2551   if (coloring->F) {
2552     w1          = coloring->F; /* use already computed value of function */
2553     coloring->F = 0;
2554   } else {
2555     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2556     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2557     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2558   }
2559 
2560   if (!coloring->w3) {
2561     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2562     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2563   }
2564   w3 = coloring->w3;
2565 
2566     CHKMEMQ;
2567     /* Compute all the local scale factors, including ghost points */
2568   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2569   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2570   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2571   if (ctype == IS_COLORING_GHOSTED){
2572     col_start = 0; col_end = N;
2573   } else if (ctype == IS_COLORING_GLOBAL){
2574     xx = xx - start;
2575     vscale_array = vscale_array - start;
2576     col_start = start; col_end = N + start;
2577   }    CHKMEMQ;
2578   for (col=col_start; col<col_end; col++){
2579     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2580     if (coloring->htype[0] == 'w') {
2581       dx = 1.0 + unorm;
2582     } else {
2583       dx  = xx[col];
2584     }
2585     if (dx == 0.0) dx = 1.0;
2586 #if !defined(PETSC_USE_COMPLEX)
2587     if (dx < umin && dx >= 0.0)      dx = umin;
2588     else if (dx < 0.0 && dx > -umin) dx = -umin;
2589 #else
2590     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2591     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2592 #endif
2593     dx               *= epsilon;
2594     vscale_array[col] = 1.0/dx;
2595   }     CHKMEMQ;
2596   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2597   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2598   if (ctype == IS_COLORING_GLOBAL){
2599     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2600     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2601   }
2602         CHKMEMQ;
2603   if (coloring->vscaleforrow) {
2604     vscaleforrow = coloring->vscaleforrow;
2605   } else {
2606     SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2607   }
2608 
2609 
2610   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2611   /*
2612     Loop over each color
2613   */
2614   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2615   for (k=0; k<coloring->ncolors; k++) {
2616     coloring->currentcolor = k;
2617     for (i=0; i<bs; i++) {
2618       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2619       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2620       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2621       /*
2622 	Loop over each column associated with color
2623 	adding the perturbation to the vector w3.
2624       */
2625       for (l=0; l<coloring->ncolumns[k]; l++) {
2626 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2627 	if (coloring->htype[0] == 'w') {
2628 	  dx = 1.0 + unorm;
2629 	} else {
2630 	  dx  = xx[col];
2631 	}
2632 	if (dx == 0.0) dx = 1.0;
2633 #if !defined(PETSC_USE_COMPLEX)
2634 	if (dx < umin && dx >= 0.0)      dx = umin;
2635 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2636 #else
2637 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2638 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2639 #endif
2640 	dx            *= epsilon;
2641 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2642 	w3_array[col] += dx;
2643       }
2644       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2645       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2646 
2647       /*
2648 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2649 	w2 = F(x1 + dx) - F(x1)
2650       */
2651       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2652       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2653       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2654       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2655 
2656       /*
2657 	Loop over rows of vector, putting results into Jacobian matrix
2658       */
2659       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2660       for (l=0; l<coloring->nrows[k]; l++) {
2661 	row    = bs*coloring->rows[k][l];             /* local row index */
2662 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2663         for (j=0; j<bs; j++) {
2664   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2665           srows[j]  = row + start + j;
2666         }
2667 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2668       }
2669       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2670     }
2671   } /* endof for each color */
2672   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2673   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2674   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2675   ierr = PetscFree(srows);CHKERRQ(ierr);
2676 
2677   coloring->currentcolor = -1;
2678   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2679   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2680   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2681   PetscFunctionReturn(0);
2682 }
2683 
2684 /* -------------------------------------------------------------------*/
2685 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2686        MatGetRow_SeqBAIJ,
2687        MatRestoreRow_SeqBAIJ,
2688        MatMult_SeqBAIJ_N,
2689 /* 4*/ MatMultAdd_SeqBAIJ_N,
2690        MatMultTranspose_SeqBAIJ,
2691        MatMultTransposeAdd_SeqBAIJ,
2692        0,
2693        0,
2694        0,
2695 /*10*/ 0,
2696        MatLUFactor_SeqBAIJ,
2697        0,
2698        0,
2699        MatTranspose_SeqBAIJ,
2700 /*15*/ MatGetInfo_SeqBAIJ,
2701        MatEqual_SeqBAIJ,
2702        MatGetDiagonal_SeqBAIJ,
2703        MatDiagonalScale_SeqBAIJ,
2704        MatNorm_SeqBAIJ,
2705 /*20*/ 0,
2706        MatAssemblyEnd_SeqBAIJ,
2707        MatSetOption_SeqBAIJ,
2708        MatZeroEntries_SeqBAIJ,
2709 /*24*/ MatZeroRows_SeqBAIJ,
2710        0,
2711        0,
2712        0,
2713        0,
2714 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2715        0,
2716        0,
2717        MatGetArray_SeqBAIJ,
2718        MatRestoreArray_SeqBAIJ,
2719 /*34*/ MatDuplicate_SeqBAIJ,
2720        0,
2721        0,
2722        MatILUFactor_SeqBAIJ,
2723        0,
2724 /*39*/ MatAXPY_SeqBAIJ,
2725        MatGetSubMatrices_SeqBAIJ,
2726        MatIncreaseOverlap_SeqBAIJ,
2727        MatGetValues_SeqBAIJ,
2728        MatCopy_SeqBAIJ,
2729 /*44*/ 0,
2730        MatScale_SeqBAIJ,
2731        0,
2732        0,
2733        0,
2734 /*49*/ MatSetBlockSize_SeqBAIJ,
2735        MatGetRowIJ_SeqBAIJ,
2736        MatRestoreRowIJ_SeqBAIJ,
2737        MatGetColumnIJ_SeqBAIJ,
2738        MatRestoreColumnIJ_SeqBAIJ,
2739 /*54*/ MatFDColoringCreate_SeqAIJ,
2740        0,
2741        0,
2742        0,
2743        MatSetValuesBlocked_SeqBAIJ,
2744 /*59*/ MatGetSubMatrix_SeqBAIJ,
2745        MatDestroy_SeqBAIJ,
2746        MatView_SeqBAIJ,
2747        0,
2748        0,
2749 /*64*/ 0,
2750        0,
2751        0,
2752        0,
2753        0,
2754 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2755        0,
2756        MatConvert_Basic,
2757        0,
2758        0,
2759 /*74*/ 0,
2760        MatFDColoringApply_BAIJ,
2761        0,
2762        0,
2763        0,
2764 /*79*/ 0,
2765        0,
2766        0,
2767        0,
2768        MatLoad_SeqBAIJ,
2769 /*84*/ 0,
2770        0,
2771        0,
2772        0,
2773        0,
2774 /*89*/ 0,
2775        0,
2776        0,
2777        0,
2778        0,
2779 /*94*/ 0,
2780        0,
2781        0,
2782        0,
2783        0,
2784 /*99*/0,
2785        0,
2786        0,
2787        0,
2788        0,
2789 /*104*/0,
2790        MatRealPart_SeqBAIJ,
2791        MatImaginaryPart_SeqBAIJ,
2792        0,
2793        0,
2794 /*109*/0,
2795        0,
2796        0,
2797        0,
2798        MatMissingDiagonal_SeqBAIJ,
2799 /*114*/0,
2800        0,
2801        0,
2802        0,
2803        0,
2804 /*119*/0,
2805        0,
2806        MatMultHermitianTranspose_SeqBAIJ,
2807        MatMultHermitianTransposeAdd_SeqBAIJ
2808 };
2809 
2810 EXTERN_C_BEGIN
2811 #undef __FUNCT__
2812 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2813 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2814 {
2815   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2816   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2817   PetscErrorCode ierr;
2818 
2819   PetscFunctionBegin;
2820   if (aij->nonew != 1) {
2821     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2822   }
2823 
2824   /* allocate space for values if not already there */
2825   if (!aij->saved_values) {
2826     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2827     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2828   }
2829 
2830   /* copy values over */
2831   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2832   PetscFunctionReturn(0);
2833 }
2834 EXTERN_C_END
2835 
2836 EXTERN_C_BEGIN
2837 #undef __FUNCT__
2838 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2839 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2840 {
2841   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2842   PetscErrorCode ierr;
2843   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2844 
2845   PetscFunctionBegin;
2846   if (aij->nonew != 1) {
2847     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2848   }
2849   if (!aij->saved_values) {
2850     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2851   }
2852 
2853   /* copy values over */
2854   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2855   PetscFunctionReturn(0);
2856 }
2857 EXTERN_C_END
2858 
2859 EXTERN_C_BEGIN
2860 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2861 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2862 EXTERN_C_END
2863 
2864 EXTERN_C_BEGIN
2865 #undef __FUNCT__
2866 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2867 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2868 {
2869   Mat_SeqBAIJ    *b;
2870   PetscErrorCode ierr;
2871   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2872   PetscTruth     flg,skipallocation = PETSC_FALSE;
2873 
2874   PetscFunctionBegin;
2875 
2876   if (nz == MAT_SKIP_ALLOCATION) {
2877     skipallocation = PETSC_TRUE;
2878     nz             = 0;
2879   }
2880 
2881   if (bs < 0) {
2882     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2883       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2884     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2885     bs   = PetscAbs(bs);
2886   }
2887   if (nnz && newbs != bs) {
2888     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2889   }
2890   bs   = newbs;
2891 
2892   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2893   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2894   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2895   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2896 
2897   B->preallocated = PETSC_TRUE;
2898 
2899   mbs  = B->rmap->n/bs;
2900   nbs  = B->cmap->n/bs;
2901   bs2  = bs*bs;
2902 
2903   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2904     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2905   }
2906 
2907   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2908   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2909   if (nnz) {
2910     for (i=0; i<mbs; i++) {
2911       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2912       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2913     }
2914   }
2915 
2916   b       = (Mat_SeqBAIJ*)B->data;
2917   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2918     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2919   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2920 
2921   if (!flg) {
2922     switch (bs) {
2923     case 1:
2924       B->ops->mult            = MatMult_SeqBAIJ_1;
2925       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2926       B->ops->sor             = MatSOR_SeqBAIJ_1;
2927       break;
2928     case 2:
2929       B->ops->mult            = MatMult_SeqBAIJ_2;
2930       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2931       B->ops->sor             = MatSOR_SeqBAIJ_2;
2932       break;
2933     case 3:
2934       B->ops->mult            = MatMult_SeqBAIJ_3;
2935       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2936       B->ops->sor             = MatSOR_SeqBAIJ_3;
2937       break;
2938     case 4:
2939       B->ops->mult            = MatMult_SeqBAIJ_4;
2940       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2941       B->ops->sor             = MatSOR_SeqBAIJ_4;
2942       break;
2943     case 5:
2944       B->ops->mult            = MatMult_SeqBAIJ_5;
2945       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2946       B->ops->sor             = MatSOR_SeqBAIJ_5;
2947       break;
2948     case 6:
2949       B->ops->mult            = MatMult_SeqBAIJ_6;
2950       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2951       B->ops->sor             = MatSOR_SeqBAIJ_6;
2952       break;
2953     case 7:
2954       B->ops->mult            = MatMult_SeqBAIJ_7;
2955       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2956       B->ops->sor             = MatSOR_SeqBAIJ_7;
2957       break;
2958     case 15:
2959       B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2960       break;
2961     default:
2962       B->ops->mult            = MatMult_SeqBAIJ_N;
2963       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2964       break;
2965     }
2966   }
2967   B->rmap->bs      = bs;
2968   b->mbs     = mbs;
2969   b->nbs     = nbs;
2970   if (!skipallocation) {
2971     if (!b->imax) {
2972       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2973       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
2974       b->free_imax_ilen = PETSC_TRUE;
2975     }
2976     /* b->ilen will count nonzeros in each block row so far. */
2977     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2978     if (!nnz) {
2979       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2980       else if (nz <= 0)        nz = 1;
2981       for (i=0; i<mbs; i++) b->imax[i] = nz;
2982       nz = nz*mbs;
2983     } else {
2984       nz = 0;
2985       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2986     }
2987 
2988     /* allocate the matrix space */
2989     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2990     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2991     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2992     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2993     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2994     b->singlemalloc = PETSC_TRUE;
2995     b->i[0] = 0;
2996     for (i=1; i<mbs+1; i++) {
2997       b->i[i] = b->i[i-1] + b->imax[i-1];
2998     }
2999     b->free_a     = PETSC_TRUE;
3000     b->free_ij    = PETSC_TRUE;
3001   } else {
3002     b->free_a     = PETSC_FALSE;
3003     b->free_ij    = PETSC_FALSE;
3004   }
3005 
3006   B->rmap->bs          = bs;
3007   b->bs2              = bs2;
3008   b->mbs              = mbs;
3009   b->nz               = 0;
3010   b->maxnz            = nz*bs2;
3011   B->info.nz_unneeded = (PetscReal)b->maxnz;
3012   PetscFunctionReturn(0);
3013 }
3014 EXTERN_C_END
3015 
3016 EXTERN_C_BEGIN
3017 #undef __FUNCT__
3018 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3019 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3020 {
3021   PetscInt       i,m,nz,nz_max=0,*nnz;
3022   PetscScalar    *values=0;
3023   PetscErrorCode ierr;
3024 
3025   PetscFunctionBegin;
3026 
3027   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3028 
3029   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3030   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3031   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3032   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3033   m = B->rmap->n/bs;
3034 
3035   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3036   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3037   for(i=0; i<m; i++) {
3038     nz = ii[i+1]- ii[i];
3039     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3040     nz_max = PetscMax(nz_max, nz);
3041     nnz[i] = nz;
3042   }
3043   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3044   ierr = PetscFree(nnz);CHKERRQ(ierr);
3045 
3046   values = (PetscScalar*)V;
3047   if (!values) {
3048     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3049     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3050   }
3051   for (i=0; i<m; i++) {
3052     PetscInt          ncols  = ii[i+1] - ii[i];
3053     const PetscInt    *icols = jj + ii[i];
3054     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3055     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3056   }
3057   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3058   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3059   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3060 
3061   PetscFunctionReturn(0);
3062 }
3063 EXTERN_C_END
3064 
3065 
3066 EXTERN_C_BEGIN
3067 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3068 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3069 EXTERN_C_END
3070 
3071 /*MC
3072    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3073    block sparse compressed row format.
3074 
3075    Options Database Keys:
3076 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3077 
3078   Level: beginner
3079 
3080 .seealso: MatCreateSeqBAIJ()
3081 M*/
3082 
3083 
3084 EXTERN_C_BEGIN
3085 #undef __FUNCT__
3086 #define __FUNCT__ "MatCreate_SeqBAIJ"
3087 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3088 {
3089   PetscErrorCode ierr;
3090   PetscMPIInt    size;
3091   Mat_SeqBAIJ    *b;
3092 
3093   PetscFunctionBegin;
3094   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3095   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3096 
3097   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3098   B->data = (void*)b;
3099   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3100   B->mapping               = 0;
3101   b->row                   = 0;
3102   b->col                   = 0;
3103   b->icol                  = 0;
3104   b->reallocs              = 0;
3105   b->saved_values          = 0;
3106 
3107   b->roworiented           = PETSC_TRUE;
3108   b->nonew                 = 0;
3109   b->diag                  = 0;
3110   b->solve_work            = 0;
3111   b->mult_work             = 0;
3112   B->spptr                 = 0;
3113   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3114   b->keepnonzeropattern    = PETSC_FALSE;
3115   b->xtoy                  = 0;
3116   b->XtoY                  = 0;
3117   b->compressedrow.use     = PETSC_FALSE;
3118   b->compressedrow.nrows   = 0;
3119   b->compressedrow.i       = PETSC_NULL;
3120   b->compressedrow.rindex  = PETSC_NULL;
3121   b->compressedrow.checked = PETSC_FALSE;
3122   B->same_nonzero          = PETSC_FALSE;
3123 
3124   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3125                                      "MatGetFactorAvailable_seqbaij_petsc",
3126                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3127   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3128                                      "MatGetFactor_seqbaij_petsc",
3129                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3130   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3131                                      "MatInvertBlockDiagonal_SeqBAIJ",
3132                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3133   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3134                                      "MatStoreValues_SeqBAIJ",
3135                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3136   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3137                                      "MatRetrieveValues_SeqBAIJ",
3138                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3139   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3140                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3141                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3142   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3143                                      "MatConvert_SeqBAIJ_SeqAIJ",
3144                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3145   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3146                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3147                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3148   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3149                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3150                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3151   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3152                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3153                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3154   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3155   PetscFunctionReturn(0);
3156 }
3157 EXTERN_C_END
3158 
3159 #undef __FUNCT__
3160 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3161 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3162 {
3163   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3164   PetscErrorCode ierr;
3165   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3166 
3167   PetscFunctionBegin;
3168   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
3169 
3170   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3171     c->imax = a->imax;
3172     c->ilen = a->ilen;
3173     c->free_imax_ilen = PETSC_FALSE;
3174   } else {
3175     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3176     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3177     for (i=0; i<mbs; i++) {
3178       c->imax[i] = a->imax[i];
3179       c->ilen[i] = a->ilen[i];
3180     }
3181     c->free_imax_ilen = PETSC_TRUE;
3182   }
3183 
3184   /* allocate the matrix space */
3185   if (mallocmatspace){
3186     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3187       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3188       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3189       c->singlemalloc = PETSC_FALSE;
3190       c->free_ij      = PETSC_FALSE;
3191       c->i            = a->i;
3192       c->j            = a->j;
3193       c->parent       = A;
3194       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3195       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3196       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3197     } else {
3198       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3199       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3200       c->singlemalloc = PETSC_TRUE;
3201       c->free_ij      = PETSC_TRUE;
3202       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3203       if (mbs > 0) {
3204 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3205 	if (cpvalues == MAT_COPY_VALUES) {
3206 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3207 	} else {
3208 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3209 	}
3210       }
3211     }
3212   }
3213 
3214   c->roworiented = a->roworiented;
3215   c->nonew       = a->nonew;
3216   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3217   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3218   c->bs2         = a->bs2;
3219   c->mbs         = a->mbs;
3220   c->nbs         = a->nbs;
3221 
3222   if (a->diag) {
3223     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3224       c->diag      = a->diag;
3225       c->free_diag = PETSC_FALSE;
3226     } else {
3227       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3228       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3229       for (i=0; i<mbs; i++) {
3230         c->diag[i] = a->diag[i];
3231       }
3232       c->free_diag = PETSC_TRUE;
3233     }
3234   } else c->diag        = 0;
3235   c->nz                 = a->nz;
3236   c->maxnz              = a->maxnz;
3237   c->solve_work         = 0;
3238   c->mult_work          = 0;
3239   c->free_a             = PETSC_TRUE;
3240   c->free_ij            = PETSC_TRUE;
3241   C->preallocated       = PETSC_TRUE;
3242   C->assembled          = PETSC_TRUE;
3243 
3244   c->compressedrow.use     = a->compressedrow.use;
3245   c->compressedrow.nrows   = a->compressedrow.nrows;
3246   c->compressedrow.checked = a->compressedrow.checked;
3247   if (a->compressedrow.checked && a->compressedrow.use){
3248     i = a->compressedrow.nrows;
3249     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3250     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3251     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3252     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3253   } else {
3254     c->compressedrow.use    = PETSC_FALSE;
3255     c->compressedrow.i      = PETSC_NULL;
3256     c->compressedrow.rindex = PETSC_NULL;
3257   }
3258   C->same_nonzero = A->same_nonzero;
3259   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3260   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3261   PetscFunctionReturn(0);
3262 }
3263 
3264 #undef __FUNCT__
3265 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3266 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3267 {
3268     PetscErrorCode ierr;
3269 
3270   PetscFunctionBegin;
3271   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3272   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3273   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3274   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3275   PetscFunctionReturn(0);
3276 }
3277 
3278 #undef __FUNCT__
3279 #define __FUNCT__ "MatLoad_SeqBAIJ"
3280 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3281 {
3282   Mat_SeqBAIJ    *a;
3283   Mat            B;
3284   PetscErrorCode ierr;
3285   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3286   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3287   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3288   PetscInt       *masked,nmask,tmp,bs2,ishift;
3289   PetscMPIInt    size;
3290   int            fd;
3291   PetscScalar    *aa;
3292   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3293 
3294   PetscFunctionBegin;
3295   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3296     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3297   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3298   bs2  = bs*bs;
3299 
3300   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3301   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
3302   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3303   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3304   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3305   M = header[1]; N = header[2]; nz = header[3];
3306 
3307   if (header[3] < 0) {
3308     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3309   }
3310 
3311   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3312 
3313   /*
3314      This code adds extra rows to make sure the number of rows is
3315     divisible by the blocksize
3316   */
3317   mbs        = M/bs;
3318   extra_rows = bs - M + bs*(mbs);
3319   if (extra_rows == bs) extra_rows = 0;
3320   else                  mbs++;
3321   if (extra_rows) {
3322     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3323   }
3324 
3325   /* read in row lengths */
3326   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3327   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3328   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3329 
3330   /* read in column indices */
3331   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3332   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3333   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3334 
3335   /* loop over row lengths determining block row lengths */
3336   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3337   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3338   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3339   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3340   rowcount = 0;
3341   nzcount = 0;
3342   for (i=0; i<mbs; i++) {
3343     nmask = 0;
3344     for (j=0; j<bs; j++) {
3345       kmax = rowlengths[rowcount];
3346       for (k=0; k<kmax; k++) {
3347         tmp = jj[nzcount++]/bs;
3348         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3349       }
3350       rowcount++;
3351     }
3352     browlengths[i] += nmask;
3353     /* zero out the mask elements we set */
3354     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3355   }
3356 
3357   /* create our matrix */
3358   ierr = MatCreate(comm,&B);
3359   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3360   ierr = MatSetType(B,type);CHKERRQ(ierr);
3361   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3362   a = (Mat_SeqBAIJ*)B->data;
3363 
3364   /* set matrix "i" values */
3365   a->i[0] = 0;
3366   for (i=1; i<= mbs; i++) {
3367     a->i[i]      = a->i[i-1] + browlengths[i-1];
3368     a->ilen[i-1] = browlengths[i-1];
3369   }
3370   a->nz         = 0;
3371   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3372 
3373   /* read in nonzero values */
3374   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3375   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3376   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3377 
3378   /* set "a" and "j" values into matrix */
3379   nzcount = 0; jcount = 0;
3380   for (i=0; i<mbs; i++) {
3381     nzcountb = nzcount;
3382     nmask    = 0;
3383     for (j=0; j<bs; j++) {
3384       kmax = rowlengths[i*bs+j];
3385       for (k=0; k<kmax; k++) {
3386         tmp = jj[nzcount++]/bs;
3387 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3388       }
3389     }
3390     /* sort the masked values */
3391     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3392 
3393     /* set "j" values into matrix */
3394     maskcount = 1;
3395     for (j=0; j<nmask; j++) {
3396       a->j[jcount++]  = masked[j];
3397       mask[masked[j]] = maskcount++;
3398     }
3399     /* set "a" values into matrix */
3400     ishift = bs2*a->i[i];
3401     for (j=0; j<bs; j++) {
3402       kmax = rowlengths[i*bs+j];
3403       for (k=0; k<kmax; k++) {
3404         tmp       = jj[nzcountb]/bs ;
3405         block     = mask[tmp] - 1;
3406         point     = jj[nzcountb] - bs*tmp;
3407         idx       = ishift + bs2*block + j + bs*point;
3408         a->a[idx] = (MatScalar)aa[nzcountb++];
3409       }
3410     }
3411     /* zero out the mask elements we set */
3412     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3413   }
3414   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3415 
3416   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3417   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3418   ierr = PetscFree(aa);CHKERRQ(ierr);
3419   ierr = PetscFree(jj);CHKERRQ(ierr);
3420   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3421 
3422   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3423   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3424   ierr = MatView_Private(B);CHKERRQ(ierr);
3425 
3426   *A = B;
3427   PetscFunctionReturn(0);
3428 }
3429 
3430 #undef __FUNCT__
3431 #define __FUNCT__ "MatCreateSeqBAIJ"
3432 /*@C
3433    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3434    compressed row) format.  For good matrix assembly performance the
3435    user should preallocate the matrix storage by setting the parameter nz
3436    (or the array nnz).  By setting these parameters accurately, performance
3437    during matrix assembly can be increased by more than a factor of 50.
3438 
3439    Collective on MPI_Comm
3440 
3441    Input Parameters:
3442 +  comm - MPI communicator, set to PETSC_COMM_SELF
3443 .  bs - size of block
3444 .  m - number of rows
3445 .  n - number of columns
3446 .  nz - number of nonzero blocks  per block row (same for all rows)
3447 -  nnz - array containing the number of nonzero blocks in the various block rows
3448          (possibly different for each block row) or PETSC_NULL
3449 
3450    Output Parameter:
3451 .  A - the matrix
3452 
3453    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3454    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3455    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3456 
3457    Options Database Keys:
3458 .   -mat_no_unroll - uses code that does not unroll the loops in the
3459                      block calculations (much slower)
3460 .    -mat_block_size - size of the blocks to use
3461 
3462    Level: intermediate
3463 
3464    Notes:
3465    The number of rows and columns must be divisible by blocksize.
3466 
3467    If the nnz parameter is given then the nz parameter is ignored
3468 
3469    A nonzero block is any block that as 1 or more nonzeros in it
3470 
3471    The block AIJ format is fully compatible with standard Fortran 77
3472    storage.  That is, the stored row and column indices can begin at
3473    either one (as in Fortran) or zero.  See the users' manual for details.
3474 
3475    Specify the preallocated storage with either nz or nnz (not both).
3476    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3477    allocation.  For additional details, see the users manual chapter on
3478    matrices.
3479 
3480 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3481 @*/
3482 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3483 {
3484   PetscErrorCode ierr;
3485 
3486   PetscFunctionBegin;
3487   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3488   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3489   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3490   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3491   PetscFunctionReturn(0);
3492 }
3493 
3494 #undef __FUNCT__
3495 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3496 /*@C
3497    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3498    per row in the matrix. For good matrix assembly performance the
3499    user should preallocate the matrix storage by setting the parameter nz
3500    (or the array nnz).  By setting these parameters accurately, performance
3501    during matrix assembly can be increased by more than a factor of 50.
3502 
3503    Collective on MPI_Comm
3504 
3505    Input Parameters:
3506 +  A - the matrix
3507 .  bs - size of block
3508 .  nz - number of block nonzeros per block row (same for all rows)
3509 -  nnz - array containing the number of block nonzeros in the various block rows
3510          (possibly different for each block row) or PETSC_NULL
3511 
3512    Options Database Keys:
3513 .   -mat_no_unroll - uses code that does not unroll the loops in the
3514                      block calculations (much slower)
3515 .    -mat_block_size - size of the blocks to use
3516 
3517    Level: intermediate
3518 
3519    Notes:
3520    If the nnz parameter is given then the nz parameter is ignored
3521 
3522    You can call MatGetInfo() to get information on how effective the preallocation was;
3523    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3524    You can also run with the option -info and look for messages with the string
3525    malloc in them to see if additional memory allocation was needed.
3526 
3527    The block AIJ format is fully compatible with standard Fortran 77
3528    storage.  That is, the stored row and column indices can begin at
3529    either one (as in Fortran) or zero.  See the users' manual for details.
3530 
3531    Specify the preallocated storage with either nz or nnz (not both).
3532    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3533    allocation.  For additional details, see the users manual chapter on
3534    matrices.
3535 
3536 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3537 @*/
3538 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3539 {
3540   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3541 
3542   PetscFunctionBegin;
3543   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3544   if (f) {
3545     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3546   }
3547   PetscFunctionReturn(0);
3548 }
3549 
3550 #undef __FUNCT__
3551 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3552 /*@C
3553    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3554    (the default sequential PETSc format).
3555 
3556    Collective on MPI_Comm
3557 
3558    Input Parameters:
3559 +  A - the matrix
3560 .  i - the indices into j for the start of each local row (starts with zero)
3561 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3562 -  v - optional values in the matrix
3563 
3564    Level: developer
3565 
3566 .keywords: matrix, aij, compressed row, sparse
3567 
3568 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3569 @*/
3570 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3571 {
3572   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3573 
3574   PetscFunctionBegin;
3575   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3576   if (f) {
3577     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3578   }
3579   PetscFunctionReturn(0);
3580 }
3581 
3582 
3583 #undef __FUNCT__
3584 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3585 /*@
3586      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3587               (upper triangular entries in CSR format) provided by the user.
3588 
3589      Collective on MPI_Comm
3590 
3591    Input Parameters:
3592 +  comm - must be an MPI communicator of size 1
3593 .  bs - size of block
3594 .  m - number of rows
3595 .  n - number of columns
3596 .  i - row indices
3597 .  j - column indices
3598 -  a - matrix values
3599 
3600    Output Parameter:
3601 .  mat - the matrix
3602 
3603    Level: intermediate
3604 
3605    Notes:
3606        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3607     once the matrix is destroyed
3608 
3609        You cannot set new nonzero locations into this matrix, that will generate an error.
3610 
3611        The i and j indices are 0 based
3612 
3613 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3614 
3615 @*/
3616 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3617 {
3618   PetscErrorCode ierr;
3619   PetscInt       ii;
3620   Mat_SeqBAIJ    *baij;
3621 
3622   PetscFunctionBegin;
3623   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3624   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3625 
3626   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3627   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3628   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3629   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3630   baij = (Mat_SeqBAIJ*)(*mat)->data;
3631   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3632   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3633 
3634   baij->i = i;
3635   baij->j = j;
3636   baij->a = a;
3637   baij->singlemalloc = PETSC_FALSE;
3638   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3639   baij->free_a       = PETSC_FALSE;
3640   baij->free_ij       = PETSC_FALSE;
3641 
3642   for (ii=0; ii<m; ii++) {
3643     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3644 #if defined(PETSC_USE_DEBUG)
3645     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3646 #endif
3647   }
3648 #if defined(PETSC_USE_DEBUG)
3649   for (ii=0; ii<baij->i[m]; ii++) {
3650     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3651     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3652   }
3653 #endif
3654 
3655   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3656   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3657   PetscFunctionReturn(0);
3658 }
3659