xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 4fd072dba270d35b213751abb588af9c325ffdf1)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"
8 #include "petscsys.h"                     /*I "petscmat.h" I*/
9 
10 #include "../src/mat/blockinvert.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
14 /*@
15   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
16 
17   Collective on Mat
18 
19   Input Parameters:
20 . mat - the matrix
21 
22   Level: advanced
23 @*/
24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
25 {
26   PetscErrorCode ierr,(*f)(Mat);
27 
28   PetscFunctionBegin;
29   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
30   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
31   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
32 
33   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
34   if (f) {
35     ierr = (*f)(mat);CHKERRQ(ierr);
36   } else {
37     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 EXTERN_C_BEGIN
43 #undef __FUNCT__
44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
46 {
47   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
48   PetscErrorCode ierr;
49   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
50   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
51   PetscReal      shift = 0.0;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 1:
65       for (i=0; i<mbs; i++) {
66         odiag = v + 1*diag_offset[i];
67         diag[0]  = odiag[0];
68         mdiag[0] = odiag[0];
69         diag[0]  = 1.0 / (diag[0] + shift);
70         diag    += 1;
71         mdiag   += 1;
72       }
73       break;
74     case 2:
75       for (i=0; i<mbs; i++) {
76         odiag   = v + 4*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
80 	diag    += 4;
81 	mdiag   += 4;
82       }
83       break;
84     case 3:
85       for (i=0; i<mbs; i++) {
86         odiag    = v + 9*diag_offset[i];
87         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
88         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
89         diag[8]  = odiag[8];
90         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
91         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
92         mdiag[8] = odiag[8];
93 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
94 	diag    += 9;
95 	mdiag   += 9;
96       }
97       break;
98     case 4:
99       for (i=0; i<mbs; i++) {
100         odiag  = v + 16*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
104 	diag  += 16;
105 	mdiag += 16;
106       }
107       break;
108     case 5:
109       for (i=0; i<mbs; i++) {
110         odiag = v + 25*diag_offset[i];
111         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
114 	diag  += 25;
115 	mdiag += 25;
116       }
117       break;
118     case 6:
119       for (i=0; i<mbs; i++) {
120         odiag = v + 36*diag_offset[i];
121         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
124 	diag  += 36;
125 	mdiag += 36;
126       }
127       break;
128     default:
129       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
130   }
131   a->idiagvalid = PETSC_TRUE;
132   PetscFunctionReturn(0);
133 }
134 EXTERN_C_END
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
138 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
139 {
140   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
141   PetscScalar        *x,x1,s1;
142   const PetscScalar  *b;
143   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
144   PetscErrorCode     ierr;
145   PetscInt           m = a->mbs,i,i2,nz,idx;
146   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
147 
148   PetscFunctionBegin;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
150   its = its*lits;
151   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
152   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
153   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
154   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
155   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
156 
157   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
158 
159   diag  = a->diag;
160   idiag = a->idiag;
161   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
162   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
163 
164   if (flag & SOR_ZERO_INITIAL_GUESS) {
165     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
166       x[0] = b[0]*idiag[0];
167       i2     = 1;
168       idiag += 1;
169       for (i=1; i<m; i++) {
170         v     = aa + ai[i];
171         vi    = aj + ai[i];
172         nz    = diag[i] - ai[i];
173         s1    = b[i2];
174         while (nz--) {
175           idx  = (*vi++);
176           x1   = x[idx];
177           s1  -= v[0]*x1;
178           v   += 1;
179         }
180         x[i2]   = idiag[0]*s1;
181         idiag   += 1;
182         i2      += 1;
183       }
184       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
185       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
186     }
187     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
188         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
189       i2    = 0;
190       mdiag = a->idiag+a->mbs;
191       for (i=0; i<m; i++) {
192         x1      = x[i2];
193         x[i2]   = mdiag[0]*x1;
194         mdiag  += 1;
195         i2     += 1;
196       }
197       ierr = PetscLogFlops(m);CHKERRQ(ierr);
198     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
199       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
200     }
201     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
202       idiag   = a->idiag+a->mbs - 1;
203       i2      = m - 1;
204       x1      = x[i2];
205       x[i2]   = idiag[0]*x1;
206       idiag -= 1;
207       i2    -= 1;
208       for (i=m-2; i>=0; i--) {
209         v     = aa + (diag[i]+1);
210         vi    = aj + diag[i] + 1;
211         nz    = ai[i+1] - diag[i] - 1;
212         s1    = x[i2];
213         while (nz--) {
214           idx  = (*vi++);
215           x1   = x[idx];
216           s1  -= v[0]*x1;
217           v   += 1;
218         }
219         x[i2]   = idiag[0]*s1;
220         idiag   -= 1;
221         i2      -= 1;
222       }
223       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
224     }
225   } else {
226     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
227   }
228   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
229   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
235 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
236 {
237   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
238   PetscScalar        *x,x1,x2,s1,s2;
239   const PetscScalar  *b;
240   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
241   PetscErrorCode     ierr;
242   PetscInt           m = a->mbs,i,i2,nz,idx;
243   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
244 
245   PetscFunctionBegin;
246   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
247   its = its*lits;
248   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
249   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
250   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
251   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
252   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
253 
254   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
255 
256   diag  = a->diag;
257   idiag = a->idiag;
258   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
259   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
260 
261   if (flag & SOR_ZERO_INITIAL_GUESS) {
262     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
263       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
264       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
265       i2     = 2;
266       idiag += 4;
267       for (i=1; i<m; i++) {
268 	v     = aa + 4*ai[i];
269 	vi    = aj + ai[i];
270 	nz    = diag[i] - ai[i];
271 	s1    = b[i2]; s2 = b[i2+1];
272 	while (nz--) {
273 	  idx  = 2*(*vi++);
274 	  x1   = x[idx]; x2 = x[1+idx];
275 	  s1  -= v[0]*x1 + v[2]*x2;
276 	  s2  -= v[1]*x1 + v[3]*x2;
277 	  v   += 4;
278 	}
279 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
280 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
281         idiag   += 4;
282         i2      += 2;
283       }
284       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
285       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
286     }
287     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
288         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
289       i2    = 0;
290       mdiag = a->idiag+4*a->mbs;
291       for (i=0; i<m; i++) {
292         x1      = x[i2]; x2 = x[i2+1];
293         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
294         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
295         mdiag  += 4;
296         i2     += 2;
297       }
298       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
299     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
300       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
301     }
302     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
303       idiag   = a->idiag+4*a->mbs - 4;
304       i2      = 2*m - 2;
305       x1      = x[i2]; x2 = x[i2+1];
306       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
307       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
308       idiag -= 4;
309       i2    -= 2;
310       for (i=m-2; i>=0; i--) {
311 	v     = aa + 4*(diag[i]+1);
312 	vi    = aj + diag[i] + 1;
313 	nz    = ai[i+1] - diag[i] - 1;
314 	s1    = x[i2]; s2 = x[i2+1];
315 	while (nz--) {
316 	  idx  = 2*(*vi++);
317 	  x1   = x[idx]; x2 = x[1+idx];
318 	  s1  -= v[0]*x1 + v[2]*x2;
319 	  s2  -= v[1]*x1 + v[3]*x2;
320 	  v   += 4;
321 	}
322 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
323 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
324         idiag   -= 4;
325         i2      -= 2;
326       }
327       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
328     }
329   } else {
330     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
331   }
332   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
333   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
339 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
340 {
341   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
342   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
343   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
344   const PetscScalar  *b;
345   PetscErrorCode     ierr;
346   PetscInt           m = a->mbs,i,i2,nz,idx;
347   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
348 
349   PetscFunctionBegin;
350   its = its*lits;
351   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
352   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
353   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
354   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
355   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
356   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
357 
358   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
359 
360   diag  = a->diag;
361   idiag = a->idiag;
362   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
363   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
364 
365   if (flag & SOR_ZERO_INITIAL_GUESS) {
366     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
367       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
368       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
369       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
370       i2     = 3;
371       idiag += 9;
372       for (i=1; i<m; i++) {
373         v     = aa + 9*ai[i];
374         vi    = aj + ai[i];
375         nz    = diag[i] - ai[i];
376         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
377         while (nz--) {
378           idx  = 3*(*vi++);
379           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
380           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
381           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
382           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
383           v   += 9;
384         }
385         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
386         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
387         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
388         idiag   += 9;
389         i2      += 3;
390       }
391       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
392       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
393     }
394     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
395         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
396       i2    = 0;
397       mdiag = a->idiag+9*a->mbs;
398       for (i=0; i<m; i++) {
399         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
400         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
401         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
402         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
403         mdiag  += 9;
404         i2     += 3;
405       }
406       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
407     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
408       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
409     }
410     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
411       idiag   = a->idiag+9*a->mbs - 9;
412       i2      = 3*m - 3;
413       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
414       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
415       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
416       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
417       idiag -= 9;
418       i2    -= 3;
419       for (i=m-2; i>=0; i--) {
420         v     = aa + 9*(diag[i]+1);
421         vi    = aj + diag[i] + 1;
422         nz    = ai[i+1] - diag[i] - 1;
423         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
424         while (nz--) {
425           idx  = 3*(*vi++);
426           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
427           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
428           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
429           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
430           v   += 9;
431         }
432         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
433         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
434         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
435         idiag   -= 9;
436         i2      -= 3;
437       }
438       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
439     }
440   } else {
441     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
442   }
443   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
444   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
450 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
451 {
452   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
453   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
454   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
455   const PetscScalar  *b;
456   PetscErrorCode     ierr;
457   PetscInt           m = a->mbs,i,i2,nz,idx;
458   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
459 
460   PetscFunctionBegin;
461   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
462   its = its*lits;
463   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
464   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
465   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
466   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
467   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
468 
469   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
470 
471   diag  = a->diag;
472   idiag = a->idiag;
473   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
474   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
475 
476   if (flag & SOR_ZERO_INITIAL_GUESS) {
477     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
478       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
479       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
480       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
481       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
482       i2     = 4;
483       idiag += 16;
484       for (i=1; i<m; i++) {
485 	v     = aa + 16*ai[i];
486 	vi    = aj + ai[i];
487 	nz    = diag[i] - ai[i];
488 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
489 	while (nz--) {
490 	  idx  = 4*(*vi++);
491 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
492 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
493 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
494 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
495 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
496 	  v   += 16;
497 	}
498 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
499 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
500 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
501 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
502         idiag   += 16;
503         i2      += 4;
504       }
505       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
506       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
507     }
508     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
509         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
510       i2    = 0;
511       mdiag = a->idiag+16*a->mbs;
512       for (i=0; i<m; i++) {
513         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
515         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
516         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
517         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
518         mdiag  += 16;
519         i2     += 4;
520       }
521       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
522     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
523       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
524     }
525     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
526       idiag   = a->idiag+16*a->mbs - 16;
527       i2      = 4*m - 4;
528       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
529       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
530       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
531       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
532       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
533       idiag -= 16;
534       i2    -= 4;
535       for (i=m-2; i>=0; i--) {
536 	v     = aa + 16*(diag[i]+1);
537 	vi    = aj + diag[i] + 1;
538 	nz    = ai[i+1] - diag[i] - 1;
539 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
540 	while (nz--) {
541 	  idx  = 4*(*vi++);
542 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
543 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
544 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
545 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
546 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
547 	  v   += 16;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
550 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
551 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
552 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
553         idiag   -= 16;
554         i2      -= 4;
555       }
556       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
568 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
569 {
570   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
571   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
572   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
573   const PetscScalar  *b;
574   PetscErrorCode     ierr;
575   PetscInt           m = a->mbs,i,i2,nz,idx;
576   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
577 
578   PetscFunctionBegin;
579   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
580   its = its*lits;
581   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
582   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
583   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
584   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
585   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
586 
587   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
588 
589   diag  = a->diag;
590   idiag = a->idiag;
591   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
592   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
593 
594   if (flag & SOR_ZERO_INITIAL_GUESS) {
595     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
596       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
597       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
598       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
599       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
600       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
601       i2     = 5;
602       idiag += 25;
603       for (i=1; i<m; i++) {
604 	v     = aa + 25*ai[i];
605 	vi    = aj + ai[i];
606 	nz    = diag[i] - ai[i];
607 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
608 	while (nz--) {
609 	  idx  = 5*(*vi++);
610 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
611 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
612 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
613 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
614 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
615 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
616 	  v   += 25;
617 	}
618 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
619 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
620 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
621 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
622 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
623         idiag   += 25;
624         i2      += 5;
625       }
626       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
627       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
628     }
629     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
630         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
631       i2    = 0;
632       mdiag = a->idiag+25*a->mbs;
633       for (i=0; i<m; i++) {
634         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
635         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
636         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
637         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
638         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
639         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
640         mdiag  += 25;
641         i2     += 5;
642       }
643       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
644     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
645       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
646     }
647     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
648       idiag   = a->idiag+25*a->mbs - 25;
649       i2      = 5*m - 5;
650       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
651       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
652       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
653       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
654       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
655       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
656       idiag -= 25;
657       i2    -= 5;
658       for (i=m-2; i>=0; i--) {
659 	v     = aa + 25*(diag[i]+1);
660 	vi    = aj + diag[i] + 1;
661 	nz    = ai[i+1] - diag[i] - 1;
662 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
663 	while (nz--) {
664 	  idx  = 5*(*vi++);
665 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
666 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
667 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
668 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
669 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
670 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
671 	  v   += 25;
672 	}
673 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
674 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
675 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
676 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
677 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
678         idiag   -= 25;
679         i2      -= 5;
680       }
681       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
682     }
683   } else {
684     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
685   }
686   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
687   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
693 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
694 {
695   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
696   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
697   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
698   const PetscScalar  *b;
699   PetscErrorCode     ierr;
700   PetscInt           m = a->mbs,i,i2,nz,idx;
701   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
702 
703   PetscFunctionBegin;
704   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
705   its = its*lits;
706   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
707   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
708   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
709   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
710   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
711 
712   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
713 
714   diag  = a->diag;
715   idiag = a->idiag;
716   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
717   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
718 
719   if (flag & SOR_ZERO_INITIAL_GUESS) {
720     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
721       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
722       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
723       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
724       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
725       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
726       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
727       i2     = 6;
728       idiag += 36;
729       for (i=1; i<m; i++) {
730         v     = aa + 36*ai[i];
731         vi    = aj + ai[i];
732         nz    = diag[i] - ai[i];
733         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
734         while (nz--) {
735           idx  = 6*(*vi++);
736           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
737           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
738           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
739           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
740           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
741           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
742           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
743           v   += 36;
744         }
745         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
746         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
747         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
748         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
749         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
750         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
751         idiag   += 36;
752         i2      += 6;
753       }
754       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
755       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
756     }
757     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
758         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
759       i2    = 0;
760       mdiag = a->idiag+36*a->mbs;
761       for (i=0; i<m; i++) {
762         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
763         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
764         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
765         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
766         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
767         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
768         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
769         mdiag  += 36;
770         i2     += 6;
771       }
772       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
773     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
774       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
775     }
776     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
777       idiag   = a->idiag+36*a->mbs - 36;
778       i2      = 6*m - 6;
779       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
780       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
781       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
782       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
783       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
784       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
785       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
786       idiag -= 36;
787       i2    -= 6;
788       for (i=m-2; i>=0; i--) {
789         v     = aa + 36*(diag[i]+1);
790         vi    = aj + diag[i] + 1;
791         nz    = ai[i+1] - diag[i] - 1;
792         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
793         while (nz--) {
794           idx  = 6*(*vi++);
795           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
796           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
797           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
798           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
799           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
800           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
801           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
802           v   += 36;
803         }
804         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
805         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
806         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
807         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
808         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
809         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
810         idiag   -= 36;
811         i2      -= 6;
812       }
813       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
814     }
815   } else {
816     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
817   }
818   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
819   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
825 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
826 {
827   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
828   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
829   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
830   const PetscScalar  *b;
831   PetscErrorCode     ierr;
832   PetscInt           m = a->mbs,i,i2,nz,idx;
833   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
834 
835   PetscFunctionBegin;
836   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
837   its = its*lits;
838   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
839   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
840   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
841   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
842   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
843 
844   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
845 
846   diag  = a->diag;
847   idiag = a->idiag;
848   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
849   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
850 
851   if (flag & SOR_ZERO_INITIAL_GUESS) {
852     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
853       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
854       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
855       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
856       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
857       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
858       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
859       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
860       i2     = 7;
861       idiag += 49;
862       for (i=1; i<m; i++) {
863         v     = aa + 49*ai[i];
864         vi    = aj + ai[i];
865         nz    = diag[i] - ai[i];
866         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
867         while (nz--) {
868           idx  = 7*(*vi++);
869           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
870           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
871           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
872           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
873           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
874           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
875           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
876           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
877           v   += 49;
878         }
879         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
880         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
881         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
882         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
883         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
884         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
885         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
886         idiag   += 49;
887         i2      += 7;
888       }
889       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
890       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
891     }
892     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
893         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
894       i2    = 0;
895       mdiag = a->idiag+49*a->mbs;
896       for (i=0; i<m; i++) {
897         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
898         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
899         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
900         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
901         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
902         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
903         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
904         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
905         mdiag  += 36;
906         i2     += 6;
907       }
908       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
909     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
910       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
911     }
912     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
913       idiag   = a->idiag+49*a->mbs - 49;
914       i2      = 7*m - 7;
915       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
916       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
917       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
918       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
919       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
920       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
921       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
922       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
923       idiag -= 49;
924       i2    -= 7;
925       for (i=m-2; i>=0; i--) {
926         v     = aa + 49*(diag[i]+1);
927         vi    = aj + diag[i] + 1;
928         nz    = ai[i+1] - diag[i] - 1;
929         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
930         while (nz--) {
931           idx  = 7*(*vi++);
932           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
933           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
934           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
935           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
936           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
937           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
938           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
939           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
940           v   += 49;
941         }
942         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
943         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
944         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
945         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
946         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
947         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
948         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
949         idiag   -= 49;
950         i2      -= 7;
951       }
952       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
953     }
954   } else {
955     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
956   }
957   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
958   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 /*
963     Special version for direct calls from Fortran (Used in PETSc-fun3d)
964 */
965 #if defined(PETSC_HAVE_FORTRAN_CAPS)
966 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
967 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
968 #define matsetvaluesblocked4_ matsetvaluesblocked4
969 #endif
970 
971 EXTERN_C_BEGIN
972 #undef __FUNCT__
973 #define __FUNCT__ "matsetvaluesblocked4_"
974 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
975 {
976   Mat               A = *AA;
977   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
978   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
979   PetscInt          *ai=a->i,*ailen=a->ilen;
980   PetscInt          *aj=a->j,stepval,lastcol = -1;
981   const PetscScalar *value = v;
982   MatScalar         *ap,*aa = a->a,*bap;
983 
984   PetscFunctionBegin;
985   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
986   stepval = (n-1)*4;
987   for (k=0; k<m; k++) { /* loop over added rows */
988     row  = im[k];
989     rp   = aj + ai[row];
990     ap   = aa + 16*ai[row];
991     nrow = ailen[row];
992     low  = 0;
993     high = nrow;
994     for (l=0; l<n; l++) { /* loop over added columns */
995       col = in[l];
996       if (col <= lastcol) low = 0; else high = nrow;
997       lastcol = col;
998       value = v + k*(stepval+4 + l)*4;
999       while (high-low > 7) {
1000         t = (low+high)/2;
1001         if (rp[t] > col) high = t;
1002         else             low  = t;
1003       }
1004       for (i=low; i<high; i++) {
1005         if (rp[i] > col) break;
1006         if (rp[i] == col) {
1007           bap  = ap +  16*i;
1008           for (ii=0; ii<4; ii++,value+=stepval) {
1009             for (jj=ii; jj<16; jj+=4) {
1010               bap[jj] += *value++;
1011             }
1012           }
1013           goto noinsert2;
1014         }
1015       }
1016       N = nrow++ - 1;
1017       high++; /* added new column index thus must search to one higher than before */
1018       /* shift up all the later entries in this row */
1019       for (ii=N; ii>=i; ii--) {
1020         rp[ii+1] = rp[ii];
1021         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1022       }
1023       if (N >= i) {
1024         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1025       }
1026       rp[i] = col;
1027       bap   = ap +  16*i;
1028       for (ii=0; ii<4; ii++,value+=stepval) {
1029         for (jj=ii; jj<16; jj+=4) {
1030           bap[jj] = *value++;
1031         }
1032       }
1033       noinsert2:;
1034       low = i;
1035     }
1036     ailen[row] = nrow;
1037   }
1038   PetscFunctionReturnVoid();
1039 }
1040 EXTERN_C_END
1041 
1042 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1043 #define matsetvalues4_ MATSETVALUES4
1044 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1045 #define matsetvalues4_ matsetvalues4
1046 #endif
1047 
1048 EXTERN_C_BEGIN
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatSetValues4_"
1051 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1052 {
1053   Mat         A = *AA;
1054   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1055   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1056   PetscInt    *ai=a->i,*ailen=a->ilen;
1057   PetscInt    *aj=a->j,brow,bcol;
1058   PetscInt    ridx,cidx,lastcol = -1;
1059   MatScalar   *ap,value,*aa=a->a,*bap;
1060 
1061   PetscFunctionBegin;
1062   for (k=0; k<m; k++) { /* loop over added rows */
1063     row  = im[k]; brow = row/4;
1064     rp   = aj + ai[brow];
1065     ap   = aa + 16*ai[brow];
1066     nrow = ailen[brow];
1067     low  = 0;
1068     high = nrow;
1069     for (l=0; l<n; l++) { /* loop over added columns */
1070       col = in[l]; bcol = col/4;
1071       ridx = row % 4; cidx = col % 4;
1072       value = v[l + k*n];
1073       if (col <= lastcol) low = 0; else high = nrow;
1074       lastcol = col;
1075       while (high-low > 7) {
1076         t = (low+high)/2;
1077         if (rp[t] > bcol) high = t;
1078         else              low  = t;
1079       }
1080       for (i=low; i<high; i++) {
1081         if (rp[i] > bcol) break;
1082         if (rp[i] == bcol) {
1083           bap  = ap +  16*i + 4*cidx + ridx;
1084           *bap += value;
1085           goto noinsert1;
1086         }
1087       }
1088       N = nrow++ - 1;
1089       high++; /* added new column thus must search to one higher than before */
1090       /* shift up all the later entries in this row */
1091       for (ii=N; ii>=i; ii--) {
1092         rp[ii+1] = rp[ii];
1093         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1094       }
1095       if (N>=i) {
1096         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1097       }
1098       rp[i]                    = bcol;
1099       ap[16*i + 4*cidx + ridx] = value;
1100       noinsert1:;
1101       low = i;
1102     }
1103     ailen[brow] = nrow;
1104   }
1105   PetscFunctionReturnVoid();
1106 }
1107 EXTERN_C_END
1108 
1109 /*
1110      Checks for missing diagonals
1111 */
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1114 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1115 {
1116   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1117   PetscErrorCode ierr;
1118   PetscInt       *diag,*jj = a->j,i;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1122   *missing = PETSC_FALSE;
1123   if (A->rmap->n > 0 && !jj) {
1124     *missing  = PETSC_TRUE;
1125     if (d) *d = 0;
1126     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1127   } else {
1128     diag     = a->diag;
1129     for (i=0; i<a->mbs; i++) {
1130       if (jj[diag[i]] != i) {
1131         *missing  = PETSC_TRUE;
1132         if (d) *d = i;
1133         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1134       }
1135     }
1136   }
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1142 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1143 {
1144   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1145   PetscErrorCode ierr;
1146   PetscInt       i,j,m = a->mbs;
1147 
1148   PetscFunctionBegin;
1149   if (!a->diag) {
1150     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1151     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1152     a->free_diag = PETSC_TRUE;
1153   }
1154   for (i=0; i<m; i++) {
1155     a->diag[i] = a->i[i+1];
1156     for (j=a->i[i]; j<a->i[i+1]; j++) {
1157       if (a->j[j] == i) {
1158         a->diag[i] = j;
1159         break;
1160       }
1161     }
1162   }
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 
1167 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1168 
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1171 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1172 {
1173   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1174   PetscErrorCode ierr;
1175   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1176   PetscInt       *tia, *tja;
1177 
1178   PetscFunctionBegin;
1179   *nn = n;
1180   if (!ia) PetscFunctionReturn(0);
1181   if (symmetric) {
1182     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1183   } else {
1184     tia = a->i; tja = a->j;
1185   }
1186 
1187   if (!blockcompressed && bs > 1) {
1188     (*nn) *= bs;
1189     nbs    = bs;
1190     /* malloc & create the natural set of indices */
1191     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1192     if (n) {
1193       (*ia)[0] = 0;
1194       for (j=1; j<bs; j++) {
1195         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1196       }
1197     }
1198 
1199     for (i=1; i<n; i++) {
1200       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1201       for (j=1; j<bs; j++) {
1202         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1203       }
1204     }
1205     if (n) {
1206       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1207     }
1208 
1209     if (ja) {
1210       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1211       cnt = 0;
1212       for (i=0; i<n; i++) {
1213         for (j=0; j<bs; j++) {
1214           for (k=tia[i]; k<tia[i+1]; k++) {
1215             for (l=0; l<bs; l++) {
1216               (*ja)[cnt++] = bs*tja[k] + l;
1217 	    }
1218           }
1219         }
1220       }
1221     }
1222 
1223     n     *= bs;
1224     nz *= bs*bs;
1225     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1226       ierr = PetscFree(tia);CHKERRQ(ierr);
1227       ierr = PetscFree(tja);CHKERRQ(ierr);
1228     }
1229   } else if (oshift == 1) {
1230     if (symmetric) {
1231       PetscInt nz = tia[A->rmap->n/bs];
1232       /*  add 1 to i and j indices */
1233       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1234       *ia = tia;
1235       if (ja) {
1236 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1237         *ja = tja;
1238       }
1239     } else {
1240       PetscInt nz = a->i[A->rmap->n/bs];
1241       /* malloc space and  add 1 to i and j indices */
1242       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1243       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1244       if (ja) {
1245 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1246 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1247       }
1248     }
1249   } else {
1250     *ia = tia;
1251     if (ja) *ja = tja;
1252   }
1253 
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1259 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1260 {
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   if (!ia) PetscFunctionReturn(0);
1265   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1266     ierr = PetscFree(*ia);CHKERRQ(ierr);
1267     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1274 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1275 {
1276   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1277   PetscErrorCode ierr;
1278 
1279   PetscFunctionBegin;
1280 #if defined(PETSC_USE_LOG)
1281   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1282 #endif
1283   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1284   if (a->row) {
1285     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1286   }
1287   if (a->col) {
1288     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1289   }
1290   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1291   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1292   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1293   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1294   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1295   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1296   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1297   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1298   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1299 
1300   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1301   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1302   ierr = PetscFree(a);CHKERRQ(ierr);
1303 
1304   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1318 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1319 {
1320   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1321   PetscErrorCode ierr;
1322 
1323   PetscFunctionBegin;
1324   switch (op) {
1325   case MAT_ROW_ORIENTED:
1326     a->roworiented    = flg;
1327     break;
1328   case MAT_KEEP_NONZERO_PATTERN:
1329     a->keepnonzeropattern = flg;
1330     break;
1331   case MAT_NEW_NONZERO_LOCATIONS:
1332     a->nonew          = (flg ? 0 : 1);
1333     break;
1334   case MAT_NEW_NONZERO_LOCATION_ERR:
1335     a->nonew          = (flg ? -1 : 0);
1336     break;
1337   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1338     a->nonew          = (flg ? -2 : 0);
1339     break;
1340   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1341     a->nounused       = (flg ? -1 : 0);
1342     break;
1343   case MAT_NEW_DIAGONALS:
1344   case MAT_IGNORE_OFF_PROC_ENTRIES:
1345   case MAT_USE_HASH_TABLE:
1346     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1347     break;
1348   case MAT_SYMMETRIC:
1349   case MAT_STRUCTURALLY_SYMMETRIC:
1350   case MAT_HERMITIAN:
1351   case MAT_SYMMETRY_ETERNAL:
1352     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1353     break;
1354   default:
1355     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1356   }
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1362 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1363 {
1364   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1365   PetscErrorCode ierr;
1366   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1367   MatScalar      *aa,*aa_i;
1368   PetscScalar    *v_i;
1369 
1370   PetscFunctionBegin;
1371   bs  = A->rmap->bs;
1372   ai  = a->i;
1373   aj  = a->j;
1374   aa  = a->a;
1375   bs2 = a->bs2;
1376 
1377   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1378 
1379   bn  = row/bs;   /* Block number */
1380   bp  = row % bs; /* Block Position */
1381   M   = ai[bn+1] - ai[bn];
1382   *nz = bs*M;
1383 
1384   if (v) {
1385     *v = 0;
1386     if (*nz) {
1387       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1388       for (i=0; i<M; i++) { /* for each block in the block row */
1389         v_i  = *v + i*bs;
1390         aa_i = aa + bs2*(ai[bn] + i);
1391         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1392       }
1393     }
1394   }
1395 
1396   if (idx) {
1397     *idx = 0;
1398     if (*nz) {
1399       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1400       for (i=0; i<M; i++) { /* for each block in the block row */
1401         idx_i = *idx + i*bs;
1402         itmp  = bs*aj[ai[bn] + i];
1403         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1404       }
1405     }
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1412 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1413 {
1414   PetscErrorCode ierr;
1415 
1416   PetscFunctionBegin;
1417   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1418   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1424 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1425 {
1426   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1427   Mat            C;
1428   PetscErrorCode ierr;
1429   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1430   PetscInt       *rows,*cols,bs2=a->bs2;
1431   MatScalar      *array;
1432 
1433   PetscFunctionBegin;
1434   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1435   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1436     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1437     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1438 
1439     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1440     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1441     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1442     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1443     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1444     ierr = PetscFree(col);CHKERRQ(ierr);
1445   } else {
1446     C = *B;
1447   }
1448 
1449   array = a->a;
1450   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1451   cols = rows + bs;
1452   for (i=0; i<mbs; i++) {
1453     cols[0] = i*bs;
1454     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1455     len = ai[i+1] - ai[i];
1456     for (j=0; j<len; j++) {
1457       rows[0] = (*aj++)*bs;
1458       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1459       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1460       array += bs2;
1461     }
1462   }
1463   ierr = PetscFree(rows);CHKERRQ(ierr);
1464 
1465   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1466   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1467 
1468   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1469     *B = C;
1470   } else {
1471     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1478 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1479 {
1480   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1481   PetscErrorCode ierr;
1482   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1483   int            fd;
1484   PetscScalar    *aa;
1485   FILE           *file;
1486 
1487   PetscFunctionBegin;
1488   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1489   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1490   col_lens[0] = MAT_FILE_COOKIE;
1491 
1492   col_lens[1] = A->rmap->N;
1493   col_lens[2] = A->cmap->n;
1494   col_lens[3] = a->nz*bs2;
1495 
1496   /* store lengths of each row and write (including header) to file */
1497   count = 0;
1498   for (i=0; i<a->mbs; i++) {
1499     for (j=0; j<bs; j++) {
1500       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1501     }
1502   }
1503   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1504   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1505 
1506   /* store column indices (zero start index) */
1507   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1508   count = 0;
1509   for (i=0; i<a->mbs; i++) {
1510     for (j=0; j<bs; j++) {
1511       for (k=a->i[i]; k<a->i[i+1]; k++) {
1512         for (l=0; l<bs; l++) {
1513           jj[count++] = bs*a->j[k] + l;
1514         }
1515       }
1516     }
1517   }
1518   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1519   ierr = PetscFree(jj);CHKERRQ(ierr);
1520 
1521   /* store nonzero values */
1522   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1523   count = 0;
1524   for (i=0; i<a->mbs; i++) {
1525     for (j=0; j<bs; j++) {
1526       for (k=a->i[i]; k<a->i[i+1]; k++) {
1527         for (l=0; l<bs; l++) {
1528           aa[count++] = a->a[bs2*k + l*bs + j];
1529         }
1530       }
1531     }
1532   }
1533   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1534   ierr = PetscFree(aa);CHKERRQ(ierr);
1535 
1536   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1537   if (file) {
1538     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1539   }
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1545 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1546 {
1547   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1548   PetscErrorCode    ierr;
1549   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1550   PetscViewerFormat format;
1551 
1552   PetscFunctionBegin;
1553   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1554   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1555     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1556   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1557     Mat aij;
1558     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1559     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1560     ierr = MatDestroy(aij);CHKERRQ(ierr);
1561   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1562      PetscFunctionReturn(0);
1563   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1564     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1565     for (i=0; i<a->mbs; i++) {
1566       for (j=0; j<bs; j++) {
1567         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1568         for (k=a->i[i]; k<a->i[i+1]; k++) {
1569           for (l=0; l<bs; l++) {
1570 #if defined(PETSC_USE_COMPLEX)
1571             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1572               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1573                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1574             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1575               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1576                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1577             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1578               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1579             }
1580 #else
1581             if (a->a[bs2*k + l*bs + j] != 0.0) {
1582               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1583             }
1584 #endif
1585           }
1586         }
1587         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1588       }
1589     }
1590     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1591   } else {
1592     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1593     for (i=0; i<a->mbs; i++) {
1594       for (j=0; j<bs; j++) {
1595         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1596         for (k=a->i[i]; k<a->i[i+1]; k++) {
1597           for (l=0; l<bs; l++) {
1598 #if defined(PETSC_USE_COMPLEX)
1599             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1600               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1601                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1602             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1603               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1604                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1605             } else {
1606               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1607             }
1608 #else
1609             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1610 #endif
1611           }
1612         }
1613         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1614       }
1615     }
1616     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1617   }
1618   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1624 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1625 {
1626   Mat            A = (Mat) Aa;
1627   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1628   PetscErrorCode ierr;
1629   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1630   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1631   MatScalar      *aa;
1632   PetscViewer    viewer;
1633 
1634   PetscFunctionBegin;
1635 
1636   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1637   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1638 
1639   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1640 
1641   /* loop over matrix elements drawing boxes */
1642   color = PETSC_DRAW_BLUE;
1643   for (i=0,row=0; i<mbs; i++,row+=bs) {
1644     for (j=a->i[i]; j<a->i[i+1]; j++) {
1645       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1646       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1647       aa = a->a + j*bs2;
1648       for (k=0; k<bs; k++) {
1649         for (l=0; l<bs; l++) {
1650           if (PetscRealPart(*aa++) >=  0.) continue;
1651           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1652         }
1653       }
1654     }
1655   }
1656   color = PETSC_DRAW_CYAN;
1657   for (i=0,row=0; i<mbs; i++,row+=bs) {
1658     for (j=a->i[i]; j<a->i[i+1]; j++) {
1659       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1660       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1661       aa = a->a + j*bs2;
1662       for (k=0; k<bs; k++) {
1663         for (l=0; l<bs; l++) {
1664           if (PetscRealPart(*aa++) != 0.) continue;
1665           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1666         }
1667       }
1668     }
1669   }
1670 
1671   color = PETSC_DRAW_RED;
1672   for (i=0,row=0; i<mbs; i++,row+=bs) {
1673     for (j=a->i[i]; j<a->i[i+1]; j++) {
1674       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1675       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1676       aa = a->a + j*bs2;
1677       for (k=0; k<bs; k++) {
1678         for (l=0; l<bs; l++) {
1679           if (PetscRealPart(*aa++) <= 0.) continue;
1680           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1681         }
1682       }
1683     }
1684   }
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1690 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1691 {
1692   PetscErrorCode ierr;
1693   PetscReal      xl,yl,xr,yr,w,h;
1694   PetscDraw      draw;
1695   PetscTruth     isnull;
1696 
1697   PetscFunctionBegin;
1698 
1699   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1700   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1701 
1702   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1703   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1704   xr += w;    yr += h;  xl = -w;     yl = -h;
1705   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1706   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1707   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 #undef __FUNCT__
1712 #define __FUNCT__ "MatView_SeqBAIJ"
1713 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1714 {
1715   PetscErrorCode ierr;
1716   PetscTruth     iascii,isbinary,isdraw;
1717 
1718   PetscFunctionBegin;
1719   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1720   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1721   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1722   if (iascii){
1723     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1724   } else if (isbinary) {
1725     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1726   } else if (isdraw) {
1727     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1728   } else {
1729     Mat B;
1730     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1731     ierr = MatView(B,viewer);CHKERRQ(ierr);
1732     ierr = MatDestroy(B);CHKERRQ(ierr);
1733   }
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1740 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1741 {
1742   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1743   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1744   PetscInt    *ai = a->i,*ailen = a->ilen;
1745   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1746   MatScalar   *ap,*aa = a->a;
1747 
1748   PetscFunctionBegin;
1749   for (k=0; k<m; k++) { /* loop over rows */
1750     row  = im[k]; brow = row/bs;
1751     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1752     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1753     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1754     nrow = ailen[brow];
1755     for (l=0; l<n; l++) { /* loop over columns */
1756       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1757       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1758       col  = in[l] ;
1759       bcol = col/bs;
1760       cidx = col%bs;
1761       ridx = row%bs;
1762       high = nrow;
1763       low  = 0; /* assume unsorted */
1764       while (high-low > 5) {
1765         t = (low+high)/2;
1766         if (rp[t] > bcol) high = t;
1767         else             low  = t;
1768       }
1769       for (i=low; i<high; i++) {
1770         if (rp[i] > bcol) break;
1771         if (rp[i] == bcol) {
1772           *v++ = ap[bs2*i+bs*cidx+ridx];
1773           goto finished;
1774         }
1775       }
1776       *v++ = 0.0;
1777       finished:;
1778     }
1779   }
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 #define CHUNKSIZE 10
1784 #undef __FUNCT__
1785 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1786 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1787 {
1788   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1789   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1790   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1791   PetscErrorCode    ierr;
1792   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1793   PetscTruth        roworiented=a->roworiented;
1794   const PetscScalar *value = v;
1795   MatScalar         *ap,*aa = a->a,*bap;
1796 
1797   PetscFunctionBegin;
1798   if (roworiented) {
1799     stepval = (n-1)*bs;
1800   } else {
1801     stepval = (m-1)*bs;
1802   }
1803   for (k=0; k<m; k++) { /* loop over added rows */
1804     row  = im[k];
1805     if (row < 0) continue;
1806 #if defined(PETSC_USE_DEBUG)
1807     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1808 #endif
1809     rp   = aj + ai[row];
1810     ap   = aa + bs2*ai[row];
1811     rmax = imax[row];
1812     nrow = ailen[row];
1813     low  = 0;
1814     high = nrow;
1815     for (l=0; l<n; l++) { /* loop over added columns */
1816       if (in[l] < 0) continue;
1817 #if defined(PETSC_USE_DEBUG)
1818       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1819 #endif
1820       col = in[l];
1821       if (roworiented) {
1822         value = v + k*(stepval+bs)*bs + l*bs;
1823       } else {
1824         value = v + l*(stepval+bs)*bs + k*bs;
1825       }
1826       if (col <= lastcol) low = 0; else high = nrow;
1827       lastcol = col;
1828       while (high-low > 7) {
1829         t = (low+high)/2;
1830         if (rp[t] > col) high = t;
1831         else             low  = t;
1832       }
1833       for (i=low; i<high; i++) {
1834         if (rp[i] > col) break;
1835         if (rp[i] == col) {
1836           bap  = ap +  bs2*i;
1837           if (roworiented) {
1838             if (is == ADD_VALUES) {
1839               for (ii=0; ii<bs; ii++,value+=stepval) {
1840                 for (jj=ii; jj<bs2; jj+=bs) {
1841                   bap[jj] += *value++;
1842                 }
1843               }
1844             } else {
1845               for (ii=0; ii<bs; ii++,value+=stepval) {
1846                 for (jj=ii; jj<bs2; jj+=bs) {
1847                   bap[jj] = *value++;
1848                 }
1849               }
1850             }
1851           } else {
1852             if (is == ADD_VALUES) {
1853               for (ii=0; ii<bs; ii++,value+=stepval) {
1854                 for (jj=0; jj<bs; jj++) {
1855                   *bap++ += *value++;
1856                 }
1857               }
1858             } else {
1859               for (ii=0; ii<bs; ii++,value+=stepval) {
1860                 for (jj=0; jj<bs; jj++) {
1861                   *bap++  = *value++;
1862                 }
1863               }
1864             }
1865           }
1866           goto noinsert2;
1867         }
1868       }
1869       if (nonew == 1) goto noinsert2;
1870       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1871       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1872       N = nrow++ - 1; high++;
1873       /* shift up all the later entries in this row */
1874       for (ii=N; ii>=i; ii--) {
1875         rp[ii+1] = rp[ii];
1876         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1877       }
1878       if (N >= i) {
1879         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1880       }
1881       rp[i] = col;
1882       bap   = ap +  bs2*i;
1883       if (roworiented) {
1884         for (ii=0; ii<bs; ii++,value+=stepval) {
1885           for (jj=ii; jj<bs2; jj+=bs) {
1886             bap[jj] = *value++;
1887           }
1888         }
1889       } else {
1890         for (ii=0; ii<bs; ii++,value+=stepval) {
1891           for (jj=0; jj<bs; jj++) {
1892             *bap++  = *value++;
1893           }
1894         }
1895       }
1896       noinsert2:;
1897       low = i;
1898     }
1899     ailen[row] = nrow;
1900   }
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 #undef __FUNCT__
1905 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1906 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1907 {
1908   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1909   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1910   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1911   PetscErrorCode ierr;
1912   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1913   MatScalar      *aa = a->a,*ap;
1914   PetscReal      ratio=0.6;
1915 
1916   PetscFunctionBegin;
1917   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1918 
1919   if (m) rmax = ailen[0];
1920   for (i=1; i<mbs; i++) {
1921     /* move each row back by the amount of empty slots (fshift) before it*/
1922     fshift += imax[i-1] - ailen[i-1];
1923     rmax   = PetscMax(rmax,ailen[i]);
1924     if (fshift) {
1925       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1926       N = ailen[i];
1927       for (j=0; j<N; j++) {
1928         ip[j-fshift] = ip[j];
1929         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1930       }
1931     }
1932     ai[i] = ai[i-1] + ailen[i-1];
1933   }
1934   if (mbs) {
1935     fshift += imax[mbs-1] - ailen[mbs-1];
1936     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1937   }
1938   /* reset ilen and imax for each row */
1939   for (i=0; i<mbs; i++) {
1940     ailen[i] = imax[i] = ai[i+1] - ai[i];
1941   }
1942   a->nz = ai[mbs];
1943 
1944   /* diagonals may have moved, so kill the diagonal pointers */
1945   a->idiagvalid = PETSC_FALSE;
1946   if (fshift && a->diag) {
1947     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1948     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1949     a->diag = 0;
1950   }
1951   if (fshift && a->nounused == -1) {
1952     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1953   }
1954   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1955   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1956   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1957   a->reallocs          = 0;
1958   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1959 
1960   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1961   if (a->compressedrow.use){
1962     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1963   }
1964 
1965   A->same_nonzero = PETSC_TRUE;
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 /*
1970    This function returns an array of flags which indicate the locations of contiguous
1971    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1972    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1973    Assume: sizes should be long enough to hold all the values.
1974 */
1975 #undef __FUNCT__
1976 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1977 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1978 {
1979   PetscInt   i,j,k,row;
1980   PetscTruth flg;
1981 
1982   PetscFunctionBegin;
1983   for (i=0,j=0; i<n; j++) {
1984     row = idx[i];
1985     if (row%bs!=0) { /* Not the begining of a block */
1986       sizes[j] = 1;
1987       i++;
1988     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1989       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1990       i++;
1991     } else { /* Begining of the block, so check if the complete block exists */
1992       flg = PETSC_TRUE;
1993       for (k=1; k<bs; k++) {
1994         if (row+k != idx[i+k]) { /* break in the block */
1995           flg = PETSC_FALSE;
1996           break;
1997         }
1998       }
1999       if (flg) { /* No break in the bs */
2000         sizes[j] = bs;
2001         i+= bs;
2002       } else {
2003         sizes[j] = 1;
2004         i++;
2005       }
2006     }
2007   }
2008   *bs_max = j;
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 #undef __FUNCT__
2013 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2014 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2015 {
2016   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2017   PetscErrorCode ierr;
2018   PetscInt       i,j,k,count,*rows;
2019   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2020   PetscScalar    zero = 0.0;
2021   MatScalar      *aa;
2022 
2023   PetscFunctionBegin;
2024   /* Make a copy of the IS and  sort it */
2025   /* allocate memory for rows,sizes */
2026   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
2027   sizes = rows + is_n;
2028 
2029   /* copy IS values to rows, and sort them */
2030   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2031   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2032   if (baij->keepnonzeropattern) {
2033     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2034     bs_max = is_n;
2035     A->same_nonzero = PETSC_TRUE;
2036   } else {
2037     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2038     A->same_nonzero = PETSC_FALSE;
2039   }
2040 
2041   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2042     row   = rows[j];
2043     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2044     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2045     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2046     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2047       if (diag != 0.0) {
2048         if (baij->ilen[row/bs] > 0) {
2049           baij->ilen[row/bs]       = 1;
2050           baij->j[baij->i[row/bs]] = row/bs;
2051           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2052         }
2053         /* Now insert all the diagonal values for this bs */
2054         for (k=0; k<bs; k++) {
2055           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2056         }
2057       } else { /* (diag == 0.0) */
2058         baij->ilen[row/bs] = 0;
2059       } /* end (diag == 0.0) */
2060     } else { /* (sizes[i] != bs) */
2061 #if defined (PETSC_USE_DEBUG)
2062       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2063 #endif
2064       for (k=0; k<count; k++) {
2065         aa[0] =  zero;
2066         aa    += bs;
2067       }
2068       if (diag != 0.0) {
2069         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2070       }
2071     }
2072   }
2073 
2074   ierr = PetscFree(rows);CHKERRQ(ierr);
2075   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2081 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2082 {
2083   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2084   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2085   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2086   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2087   PetscErrorCode ierr;
2088   PetscInt       ridx,cidx,bs2=a->bs2;
2089   PetscTruth     roworiented=a->roworiented;
2090   MatScalar      *ap,value,*aa=a->a,*bap;
2091 
2092   PetscFunctionBegin;
2093   for (k=0; k<m; k++) { /* loop over added rows */
2094     row  = im[k];
2095     brow = row/bs;
2096     if (row < 0) continue;
2097 #if defined(PETSC_USE_DEBUG)
2098     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2099 #endif
2100     rp   = aj + ai[brow];
2101     ap   = aa + bs2*ai[brow];
2102     rmax = imax[brow];
2103     nrow = ailen[brow];
2104     low  = 0;
2105     high = nrow;
2106     for (l=0; l<n; l++) { /* loop over added columns */
2107       if (in[l] < 0) continue;
2108 #if defined(PETSC_USE_DEBUG)
2109       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2110 #endif
2111       col = in[l]; bcol = col/bs;
2112       ridx = row % bs; cidx = col % bs;
2113       if (roworiented) {
2114         value = v[l + k*n];
2115       } else {
2116         value = v[k + l*m];
2117       }
2118       if (col <= lastcol) low = 0; else high = nrow;
2119       lastcol = col;
2120       while (high-low > 7) {
2121         t = (low+high)/2;
2122         if (rp[t] > bcol) high = t;
2123         else              low  = t;
2124       }
2125       for (i=low; i<high; i++) {
2126         if (rp[i] > bcol) break;
2127         if (rp[i] == bcol) {
2128           bap  = ap +  bs2*i + bs*cidx + ridx;
2129           if (is == ADD_VALUES) *bap += value;
2130           else                  *bap  = value;
2131           goto noinsert1;
2132         }
2133       }
2134       if (nonew == 1) goto noinsert1;
2135       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2136       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2137       N = nrow++ - 1; high++;
2138       /* shift up all the later entries in this row */
2139       for (ii=N; ii>=i; ii--) {
2140         rp[ii+1] = rp[ii];
2141         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2142       }
2143       if (N>=i) {
2144         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2145       }
2146       rp[i]                      = bcol;
2147       ap[bs2*i + bs*cidx + ridx] = value;
2148       a->nz++;
2149       noinsert1:;
2150       low = i;
2151     }
2152     ailen[brow] = nrow;
2153   }
2154   A->same_nonzero = PETSC_FALSE;
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2159 
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2162 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2163 {
2164   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2165   Mat            outA;
2166   PetscErrorCode ierr;
2167   PetscTruth     row_identity,col_identity;
2168 
2169   PetscFunctionBegin;
2170   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2171   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2172   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2173   if (!row_identity || !col_identity) {
2174     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2175   }
2176 
2177   outA          = inA;
2178   inA->factor   = MAT_FACTOR_LU;
2179 
2180   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2181 
2182   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2183   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2184   a->row = row;
2185   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2186   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2187   a->col = col;
2188 
2189   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2190   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2191   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2192 
2193   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2194   if (!a->solve_work) {
2195     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2196     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2197   }
2198   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2199 
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 EXTERN_C_BEGIN
2204 #undef __FUNCT__
2205 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2206 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2207 {
2208   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2209   PetscInt    i,nz,mbs;
2210 
2211   PetscFunctionBegin;
2212   nz  = baij->maxnz/baij->bs2;
2213   mbs = baij->mbs;
2214   for (i=0; i<nz; i++) {
2215     baij->j[i] = indices[i];
2216   }
2217   baij->nz = nz;
2218   for (i=0; i<mbs; i++) {
2219     baij->ilen[i] = baij->imax[i];
2220   }
2221   PetscFunctionReturn(0);
2222 }
2223 EXTERN_C_END
2224 
2225 #undef __FUNCT__
2226 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2227 /*@
2228     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2229        in the matrix.
2230 
2231   Input Parameters:
2232 +  mat - the SeqBAIJ matrix
2233 -  indices - the column indices
2234 
2235   Level: advanced
2236 
2237   Notes:
2238     This can be called if you have precomputed the nonzero structure of the
2239   matrix and want to provide it to the matrix object to improve the performance
2240   of the MatSetValues() operation.
2241 
2242     You MUST have set the correct numbers of nonzeros per row in the call to
2243   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2244 
2245     MUST be called before any calls to MatSetValues();
2246 
2247 @*/
2248 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2249 {
2250   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2251 
2252   PetscFunctionBegin;
2253   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2254   PetscValidPointer(indices,2);
2255   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2256   if (f) {
2257     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2258   } else {
2259     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2260   }
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2266 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2267 {
2268   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2269   PetscErrorCode ierr;
2270   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2271   PetscReal      atmp;
2272   PetscScalar    *x,zero = 0.0;
2273   MatScalar      *aa;
2274   PetscInt       ncols,brow,krow,kcol;
2275 
2276   PetscFunctionBegin;
2277   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2278   bs   = A->rmap->bs;
2279   aa   = a->a;
2280   ai   = a->i;
2281   aj   = a->j;
2282   mbs  = a->mbs;
2283 
2284   ierr = VecSet(v,zero);CHKERRQ(ierr);
2285   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2286   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2287   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2288   for (i=0; i<mbs; i++) {
2289     ncols = ai[1] - ai[0]; ai++;
2290     brow  = bs*i;
2291     for (j=0; j<ncols; j++){
2292       for (kcol=0; kcol<bs; kcol++){
2293         for (krow=0; krow<bs; krow++){
2294           atmp = PetscAbsScalar(*aa);aa++;
2295           row = brow + krow;    /* row index */
2296           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2297           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2298         }
2299       }
2300       aj++;
2301     }
2302   }
2303   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #undef __FUNCT__
2308 #define __FUNCT__ "MatCopy_SeqBAIJ"
2309 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2310 {
2311   PetscErrorCode ierr;
2312 
2313   PetscFunctionBegin;
2314   /* If the two matrices have the same copy implementation, use fast copy. */
2315   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2316     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2317     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2318 
2319     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2320       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2321     }
2322     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2323   } else {
2324     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2325   }
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 #undef __FUNCT__
2330 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2331 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2332 {
2333   PetscErrorCode ierr;
2334 
2335   PetscFunctionBegin;
2336   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2342 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2343 {
2344   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2345   PetscFunctionBegin;
2346   *array = a->a;
2347   PetscFunctionReturn(0);
2348 }
2349 
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2352 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2353 {
2354   PetscFunctionBegin;
2355   PetscFunctionReturn(0);
2356 }
2357 
2358 #include "petscblaslapack.h"
2359 #undef __FUNCT__
2360 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2361 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2362 {
2363   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2364   PetscErrorCode ierr;
2365   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2366   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2367 
2368   PetscFunctionBegin;
2369   if (str == SAME_NONZERO_PATTERN) {
2370     PetscScalar alpha = a;
2371     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2372   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2373     if (y->xtoy && y->XtoY != X) {
2374       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2375       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2376     }
2377     if (!y->xtoy) { /* get xtoy */
2378       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2379       y->XtoY = X;
2380       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2381     }
2382     bs2 = bs*bs;
2383     for (i=0; i<x->nz; i++) {
2384       j = 0;
2385       while (j < bs2){
2386         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2387         j++;
2388       }
2389     }
2390     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2391   } else {
2392     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2393   }
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2399 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2400 {
2401   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2402   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2403   MatScalar      *aa = a->a;
2404 
2405   PetscFunctionBegin;
2406   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 #undef __FUNCT__
2411 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2412 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2413 {
2414   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2415   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2416   MatScalar      *aa = a->a;
2417 
2418   PetscFunctionBegin;
2419   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2427 /*
2428     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2429 */
2430 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2431 {
2432   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2433   PetscErrorCode ierr;
2434   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2435   PetscInt       nz = a->i[m],row,*jj,mr,col;
2436 
2437   PetscFunctionBegin;
2438   *nn = n;
2439   if (!ia) PetscFunctionReturn(0);
2440   if (symmetric) {
2441     SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices");
2442   } else {
2443     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2444     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2445     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2446     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2447     jj = a->j;
2448     for (i=0; i<nz; i++) {
2449       collengths[jj[i]]++;
2450     }
2451     cia[0] = oshift;
2452     for (i=0; i<n; i++) {
2453       cia[i+1] = cia[i] + collengths[i];
2454     }
2455     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2456     jj   = a->j;
2457     for (row=0; row<m; row++) {
2458       mr = a->i[row+1] - a->i[row];
2459       for (i=0; i<mr; i++) {
2460         col = *jj++;
2461         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2462       }
2463     }
2464     ierr = PetscFree(collengths);CHKERRQ(ierr);
2465     *ia = cia; *ja = cja;
2466   }
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2472 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2473 {
2474   PetscErrorCode ierr;
2475 
2476   PetscFunctionBegin;
2477   if (!ia) PetscFunctionReturn(0);
2478   ierr = PetscFree(*ia);CHKERRQ(ierr);
2479   ierr = PetscFree(*ja);CHKERRQ(ierr);
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2485 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2486 {
2487   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2488   PetscErrorCode ierr;
2489   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2490   PetscScalar    dx,*y,*xx,*w3_array;
2491   PetscScalar    *vscale_array;
2492   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2493   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2494   void           *fctx = coloring->fctx;
2495   PetscTruth     flg = PETSC_FALSE;
2496   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2497   Vec            x1_tmp;
2498 
2499   PetscFunctionBegin;
2500   PetscValidHeaderSpecific(J,MAT_COOKIE,1);
2501   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2);
2502   PetscValidHeaderSpecific(x1,VEC_COOKIE,3);
2503   if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2504 
2505   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2506   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2507   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2508   if (flg) {
2509     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2510   } else {
2511     PetscTruth assembled;
2512     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2513     if (assembled) {
2514       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2515     }
2516   }
2517 
2518   x1_tmp = x1;
2519   if (!coloring->vscale){
2520     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2521   }
2522 
2523   /*
2524     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2525     coloring->F for the coarser grids from the finest
2526   */
2527   if (coloring->F) {
2528     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2529     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2530     if (m1 != m2) {
2531       coloring->F = 0;
2532       }
2533     }
2534 
2535   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2536     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2537   }
2538   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2539 
2540   /* Set w1 = F(x1) */
2541   if (coloring->F) {
2542     w1          = coloring->F; /* use already computed value of function */
2543     coloring->F = 0;
2544   } else {
2545     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2546     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2547     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2548   }
2549 
2550   if (!coloring->w3) {
2551     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2552     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2553   }
2554   w3 = coloring->w3;
2555 
2556     CHKMEMQ;
2557     /* Compute all the local scale factors, including ghost points */
2558   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2559   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2560   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2561   if (ctype == IS_COLORING_GHOSTED){
2562     col_start = 0; col_end = N;
2563   } else if (ctype == IS_COLORING_GLOBAL){
2564     xx = xx - start;
2565     vscale_array = vscale_array - start;
2566     col_start = start; col_end = N + start;
2567   }    CHKMEMQ;
2568   for (col=col_start; col<col_end; col++){
2569     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2570     if (coloring->htype[0] == 'w') {
2571       dx = 1.0 + unorm;
2572     } else {
2573       dx  = xx[col];
2574     }
2575     if (dx == 0.0) dx = 1.0;
2576 #if !defined(PETSC_USE_COMPLEX)
2577     if (dx < umin && dx >= 0.0)      dx = umin;
2578     else if (dx < 0.0 && dx > -umin) dx = -umin;
2579 #else
2580     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2581     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2582 #endif
2583     dx               *= epsilon;
2584     vscale_array[col] = 1.0/dx;
2585   }     CHKMEMQ;
2586   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2587   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2588   if (ctype == IS_COLORING_GLOBAL){
2589     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2590     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2591   }
2592         CHKMEMQ;
2593   if (coloring->vscaleforrow) {
2594     vscaleforrow = coloring->vscaleforrow;
2595   } else {
2596     SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2597   }
2598 
2599 
2600   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2601   /*
2602     Loop over each color
2603   */
2604   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2605   for (k=0; k<coloring->ncolors; k++) {
2606     coloring->currentcolor = k;
2607     for (i=0; i<bs; i++) {
2608       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2609       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2610       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2611       /*
2612 	Loop over each column associated with color
2613 	adding the perturbation to the vector w3.
2614       */
2615       for (l=0; l<coloring->ncolumns[k]; l++) {
2616 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2617 	if (coloring->htype[0] == 'w') {
2618 	  dx = 1.0 + unorm;
2619 	} else {
2620 	  dx  = xx[col];
2621 	}
2622 	if (dx == 0.0) dx = 1.0;
2623 #if !defined(PETSC_USE_COMPLEX)
2624 	if (dx < umin && dx >= 0.0)      dx = umin;
2625 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2626 #else
2627 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2628 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2629 #endif
2630 	dx            *= epsilon;
2631 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2632 	w3_array[col] += dx;
2633       }
2634       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2635       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2636 
2637       /*
2638 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2639 	w2 = F(x1 + dx) - F(x1)
2640       */
2641       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2642       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2643       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2644       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2645 
2646       /*
2647 	Loop over rows of vector, putting results into Jacobian matrix
2648       */
2649       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2650       for (l=0; l<coloring->nrows[k]; l++) {
2651 	row    = bs*coloring->rows[k][l];             /* local row index */
2652 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2653         for (j=0; j<bs; j++) {
2654   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2655           srows[j]  = row + start + j;
2656         }
2657 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2658       }
2659       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2660     }
2661   } /* endof for each color */
2662   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2663   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2664   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2665   ierr = PetscFree(srows);CHKERRQ(ierr);
2666 
2667   coloring->currentcolor = -1;
2668   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2669   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2670   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2671   PetscFunctionReturn(0);
2672 }
2673 
2674 /* -------------------------------------------------------------------*/
2675 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2676        MatGetRow_SeqBAIJ,
2677        MatRestoreRow_SeqBAIJ,
2678        MatMult_SeqBAIJ_N,
2679 /* 4*/ MatMultAdd_SeqBAIJ_N,
2680        MatMultTranspose_SeqBAIJ,
2681        MatMultTransposeAdd_SeqBAIJ,
2682        0,
2683        0,
2684        0,
2685 /*10*/ 0,
2686        MatLUFactor_SeqBAIJ,
2687        0,
2688        0,
2689        MatTranspose_SeqBAIJ,
2690 /*15*/ MatGetInfo_SeqBAIJ,
2691        MatEqual_SeqBAIJ,
2692        MatGetDiagonal_SeqBAIJ,
2693        MatDiagonalScale_SeqBAIJ,
2694        MatNorm_SeqBAIJ,
2695 /*20*/ 0,
2696        MatAssemblyEnd_SeqBAIJ,
2697        MatSetOption_SeqBAIJ,
2698        MatZeroEntries_SeqBAIJ,
2699 /*24*/ MatZeroRows_SeqBAIJ,
2700        0,
2701        0,
2702        0,
2703        0,
2704 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2705        0,
2706        0,
2707        MatGetArray_SeqBAIJ,
2708        MatRestoreArray_SeqBAIJ,
2709 /*34*/ MatDuplicate_SeqBAIJ,
2710        0,
2711        0,
2712        MatILUFactor_SeqBAIJ,
2713        0,
2714 /*39*/ MatAXPY_SeqBAIJ,
2715        MatGetSubMatrices_SeqBAIJ,
2716        MatIncreaseOverlap_SeqBAIJ,
2717        MatGetValues_SeqBAIJ,
2718        MatCopy_SeqBAIJ,
2719 /*44*/ 0,
2720        MatScale_SeqBAIJ,
2721        0,
2722        0,
2723        MatILUDTFactor_SeqBAIJ,
2724 /*49*/ 0,
2725        MatGetRowIJ_SeqBAIJ,
2726        MatRestoreRowIJ_SeqBAIJ,
2727        MatGetColumnIJ_SeqBAIJ,
2728        MatRestoreColumnIJ_SeqBAIJ,
2729 /*54*/ MatFDColoringCreate_SeqAIJ,
2730        0,
2731        0,
2732        0,
2733        MatSetValuesBlocked_SeqBAIJ,
2734 /*59*/ MatGetSubMatrix_SeqBAIJ,
2735        MatDestroy_SeqBAIJ,
2736        MatView_SeqBAIJ,
2737        0,
2738        0,
2739 /*64*/ 0,
2740        0,
2741        0,
2742        0,
2743        0,
2744 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2745        0,
2746        MatConvert_Basic,
2747        0,
2748        0,
2749 /*74*/ 0,
2750        MatFDColoringApply_BAIJ,
2751        0,
2752        0,
2753        0,
2754 /*79*/ 0,
2755        0,
2756        0,
2757        0,
2758        MatLoad_SeqBAIJ,
2759 /*84*/ 0,
2760        0,
2761        0,
2762        0,
2763        0,
2764 /*89*/ 0,
2765        0,
2766        0,
2767        0,
2768        0,
2769 /*94*/ 0,
2770        0,
2771        0,
2772        0,
2773        0,
2774 /*99*/0,
2775        0,
2776        0,
2777        0,
2778        0,
2779 /*104*/0,
2780        MatRealPart_SeqBAIJ,
2781        MatImaginaryPart_SeqBAIJ,
2782        0,
2783        0,
2784 /*109*/0,
2785        0,
2786        0,
2787        0,
2788        MatMissingDiagonal_SeqBAIJ
2789 /*114*/
2790 };
2791 
2792 EXTERN_C_BEGIN
2793 #undef __FUNCT__
2794 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2795 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2796 {
2797   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2798   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2799   PetscErrorCode ierr;
2800 
2801   PetscFunctionBegin;
2802   if (aij->nonew != 1) {
2803     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2804   }
2805 
2806   /* allocate space for values if not already there */
2807   if (!aij->saved_values) {
2808     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2809   }
2810 
2811   /* copy values over */
2812   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2813   PetscFunctionReturn(0);
2814 }
2815 EXTERN_C_END
2816 
2817 EXTERN_C_BEGIN
2818 #undef __FUNCT__
2819 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2820 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2821 {
2822   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2823   PetscErrorCode ierr;
2824   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2825 
2826   PetscFunctionBegin;
2827   if (aij->nonew != 1) {
2828     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2829   }
2830   if (!aij->saved_values) {
2831     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2832   }
2833 
2834   /* copy values over */
2835   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2836   PetscFunctionReturn(0);
2837 }
2838 EXTERN_C_END
2839 
2840 EXTERN_C_BEGIN
2841 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2842 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2843 EXTERN_C_END
2844 
2845 EXTERN_C_BEGIN
2846 #undef __FUNCT__
2847 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2848 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2849 {
2850   Mat_SeqBAIJ    *b;
2851   PetscErrorCode ierr;
2852   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2853   PetscTruth     flg,skipallocation = PETSC_FALSE;
2854 
2855   PetscFunctionBegin;
2856 
2857   if (nz == MAT_SKIP_ALLOCATION) {
2858     skipallocation = PETSC_TRUE;
2859     nz             = 0;
2860   }
2861 
2862   if (bs < 0) {
2863     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2864       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2865     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2866     bs   = PetscAbs(bs);
2867   }
2868   if (nnz && newbs != bs) {
2869     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2870   }
2871   bs   = newbs;
2872 
2873   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2874   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2875   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2876   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2877 
2878   B->preallocated = PETSC_TRUE;
2879 
2880   mbs  = B->rmap->n/bs;
2881   nbs  = B->cmap->n/bs;
2882   bs2  = bs*bs;
2883 
2884   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2885     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2886   }
2887 
2888   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2889   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2890   if (nnz) {
2891     for (i=0; i<mbs; i++) {
2892       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2893       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2894     }
2895   }
2896 
2897   b       = (Mat_SeqBAIJ*)B->data;
2898   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2899     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2900   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2901 
2902   if (!flg) {
2903     switch (bs) {
2904     case 1:
2905       B->ops->mult            = MatMult_SeqBAIJ_1;
2906       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2907       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2908       break;
2909     case 2:
2910       B->ops->mult            = MatMult_SeqBAIJ_2;
2911       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2912       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2913       break;
2914     case 3:
2915       B->ops->mult            = MatMult_SeqBAIJ_3;
2916       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2917       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2918       break;
2919     case 4:
2920       B->ops->mult            = MatMult_SeqBAIJ_4;
2921       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2922       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2923       break;
2924     case 5:
2925       B->ops->mult            = MatMult_SeqBAIJ_5;
2926       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2927       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2928       break;
2929     case 6:
2930       B->ops->mult            = MatMult_SeqBAIJ_6;
2931       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2932       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2933       break;
2934     case 7:
2935       B->ops->mult            = MatMult_SeqBAIJ_7;
2936       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2937       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2938       break;
2939     default:
2940       B->ops->mult            = MatMult_SeqBAIJ_N;
2941       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2942       break;
2943     }
2944   }
2945   B->rmap->bs      = bs;
2946   b->mbs     = mbs;
2947   b->nbs     = nbs;
2948   if (!skipallocation) {
2949     if (!b->imax) {
2950       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2951       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
2952       b->free_imax_ilen = PETSC_TRUE;
2953     }
2954     /* b->ilen will count nonzeros in each block row so far. */
2955     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2956     if (!nnz) {
2957       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2958       else if (nz <= 0)        nz = 1;
2959       for (i=0; i<mbs; i++) b->imax[i] = nz;
2960       nz = nz*mbs;
2961     } else {
2962       nz = 0;
2963       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2964     }
2965 
2966     /* allocate the matrix space */
2967     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2968     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2969     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2970     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2971     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2972     b->singlemalloc = PETSC_TRUE;
2973     b->i[0] = 0;
2974     for (i=1; i<mbs+1; i++) {
2975       b->i[i] = b->i[i-1] + b->imax[i-1];
2976     }
2977     b->free_a     = PETSC_TRUE;
2978     b->free_ij    = PETSC_TRUE;
2979   } else {
2980     b->free_a     = PETSC_FALSE;
2981     b->free_ij    = PETSC_FALSE;
2982   }
2983 
2984   B->rmap->bs          = bs;
2985   b->bs2              = bs2;
2986   b->mbs              = mbs;
2987   b->nz               = 0;
2988   b->maxnz            = nz*bs2;
2989   B->info.nz_unneeded = (PetscReal)b->maxnz;
2990   PetscFunctionReturn(0);
2991 }
2992 EXTERN_C_END
2993 
2994 EXTERN_C_BEGIN
2995 #undef __FUNCT__
2996 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2997 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2998 {
2999   PetscInt       i,m,nz,nz_max=0,*nnz;
3000   PetscScalar    *values=0;
3001   PetscErrorCode ierr;
3002 
3003   PetscFunctionBegin;
3004 
3005   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3006 
3007   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3008   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3009   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3010   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3011   m = B->rmap->n/bs;
3012 
3013   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3014   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3015   for(i=0; i<m; i++) {
3016     nz = ii[i+1]- ii[i];
3017     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3018     nz_max = PetscMax(nz_max, nz);
3019     nnz[i] = nz;
3020   }
3021   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3022   ierr = PetscFree(nnz);CHKERRQ(ierr);
3023 
3024   values = (PetscScalar*)V;
3025   if (!values) {
3026     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3027     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3028   }
3029   for (i=0; i<m; i++) {
3030     PetscInt          ncols  = ii[i+1] - ii[i];
3031     const PetscInt    *icols = jj + ii[i];
3032     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3033     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3034   }
3035   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3036   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3037   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3038 
3039   PetscFunctionReturn(0);
3040 }
3041 EXTERN_C_END
3042 
3043 
3044 EXTERN_C_BEGIN
3045 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3046 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3047 EXTERN_C_END
3048 
3049 /*MC
3050    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3051    block sparse compressed row format.
3052 
3053    Options Database Keys:
3054 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3055 
3056   Level: beginner
3057 
3058 .seealso: MatCreateSeqBAIJ()
3059 M*/
3060 
3061 
3062 EXTERN_C_BEGIN
3063 #undef __FUNCT__
3064 #define __FUNCT__ "MatCreate_SeqBAIJ"
3065 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3066 {
3067   PetscErrorCode ierr;
3068   PetscMPIInt    size;
3069   Mat_SeqBAIJ    *b;
3070 
3071   PetscFunctionBegin;
3072   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3073   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3074 
3075   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3076   B->data = (void*)b;
3077   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3078   B->mapping               = 0;
3079   b->row                   = 0;
3080   b->col                   = 0;
3081   b->icol                  = 0;
3082   b->reallocs              = 0;
3083   b->saved_values          = 0;
3084 
3085   b->roworiented           = PETSC_TRUE;
3086   b->nonew                 = 0;
3087   b->diag                  = 0;
3088   b->solve_work            = 0;
3089   b->mult_work             = 0;
3090   B->spptr                 = 0;
3091   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3092   b->keepnonzeropattern    = PETSC_FALSE;
3093   b->xtoy                  = 0;
3094   b->XtoY                  = 0;
3095   b->compressedrow.use     = PETSC_FALSE;
3096   b->compressedrow.nrows   = 0;
3097   b->compressedrow.i       = PETSC_NULL;
3098   b->compressedrow.rindex  = PETSC_NULL;
3099   b->compressedrow.checked = PETSC_FALSE;
3100   B->same_nonzero          = PETSC_FALSE;
3101 
3102   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
3103                                      "MatGetFactorAvailable_seqbaij_petsc",
3104                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3105   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
3106                                      "MatGetFactor_seqbaij_petsc",
3107                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3108   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3109                                      "MatInvertBlockDiagonal_SeqBAIJ",
3110                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3112                                      "MatStoreValues_SeqBAIJ",
3113                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3114   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3115                                      "MatRetrieveValues_SeqBAIJ",
3116                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3117   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3118                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3119                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3120   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3121                                      "MatConvert_SeqBAIJ_SeqAIJ",
3122                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3123   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3124                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3125                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3126   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3127                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3128                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3129   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3130                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3131                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3132   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3133   PetscFunctionReturn(0);
3134 }
3135 EXTERN_C_END
3136 
3137 #undef __FUNCT__
3138 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3139 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3140 {
3141   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3142   PetscErrorCode ierr;
3143   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3144 
3145   PetscFunctionBegin;
3146   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
3147 
3148   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3149     c->imax = a->imax;
3150     c->ilen = a->ilen;
3151     c->free_imax_ilen = PETSC_FALSE;
3152   } else {
3153     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3154     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3155     for (i=0; i<mbs; i++) {
3156       c->imax[i] = a->imax[i];
3157       c->ilen[i] = a->ilen[i];
3158     }
3159     c->free_imax_ilen = PETSC_TRUE;
3160   }
3161 
3162   /* allocate the matrix space */
3163   if (mallocmatspace){
3164     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3165       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3166       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3167       c->singlemalloc = PETSC_FALSE;
3168       c->free_ij      = PETSC_FALSE;
3169       c->i            = a->i;
3170       c->j            = a->j;
3171       c->parent       = A;
3172       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3173       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3174       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3175     } else {
3176       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3177       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3178       c->singlemalloc = PETSC_TRUE;
3179       c->free_ij      = PETSC_TRUE;
3180       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3181       if (mbs > 0) {
3182 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3183 	if (cpvalues == MAT_COPY_VALUES) {
3184 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3185 	} else {
3186 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3187 	}
3188       }
3189     }
3190   }
3191 
3192   c->roworiented = a->roworiented;
3193   c->nonew       = a->nonew;
3194   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
3195   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
3196   c->bs2         = a->bs2;
3197   c->mbs         = a->mbs;
3198   c->nbs         = a->nbs;
3199 
3200   if (a->diag) {
3201     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3202       c->diag      = a->diag;
3203       c->free_diag = PETSC_FALSE;
3204     } else {
3205       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3206       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3207       for (i=0; i<mbs; i++) {
3208         c->diag[i] = a->diag[i];
3209       }
3210       c->free_diag = PETSC_TRUE;
3211     }
3212   } else c->diag        = 0;
3213   c->nz                 = a->nz;
3214   c->maxnz              = a->maxnz;
3215   c->solve_work         = 0;
3216   c->mult_work          = 0;
3217   c->free_a             = PETSC_TRUE;
3218   c->free_ij            = PETSC_TRUE;
3219   C->preallocated       = PETSC_TRUE;
3220   C->assembled          = PETSC_TRUE;
3221 
3222   c->compressedrow.use     = a->compressedrow.use;
3223   c->compressedrow.nrows   = a->compressedrow.nrows;
3224   c->compressedrow.checked = a->compressedrow.checked;
3225   if ( a->compressedrow.checked && a->compressedrow.use){
3226     i = a->compressedrow.nrows;
3227     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
3228     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3229     c->compressedrow.rindex = c->compressedrow.i + i + 1;
3230     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3231     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3232   } else {
3233     c->compressedrow.use    = PETSC_FALSE;
3234     c->compressedrow.i      = PETSC_NULL;
3235     c->compressedrow.rindex = PETSC_NULL;
3236   }
3237   C->same_nonzero = A->same_nonzero;
3238   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3239   PetscFunctionReturn(0);
3240 }
3241 
3242 #undef __FUNCT__
3243 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3244 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3245 {
3246     PetscErrorCode ierr;
3247 
3248   PetscFunctionBegin;
3249   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3250   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3251   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3252   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3253   PetscFunctionReturn(0);
3254 }
3255 
3256 #undef __FUNCT__
3257 #define __FUNCT__ "MatLoad_SeqBAIJ"
3258 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3259 {
3260   Mat_SeqBAIJ    *a;
3261   Mat            B;
3262   PetscErrorCode ierr;
3263   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3264   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3265   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3266   PetscInt       *masked,nmask,tmp,bs2,ishift;
3267   PetscMPIInt    size;
3268   int            fd;
3269   PetscScalar    *aa;
3270   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3271 
3272   PetscFunctionBegin;
3273   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3274     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3275   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3276   bs2  = bs*bs;
3277 
3278   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3279   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
3280   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3281   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3282   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3283   M = header[1]; N = header[2]; nz = header[3];
3284 
3285   if (header[3] < 0) {
3286     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3287   }
3288 
3289   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3290 
3291   /*
3292      This code adds extra rows to make sure the number of rows is
3293     divisible by the blocksize
3294   */
3295   mbs        = M/bs;
3296   extra_rows = bs - M + bs*(mbs);
3297   if (extra_rows == bs) extra_rows = 0;
3298   else                  mbs++;
3299   if (extra_rows) {
3300     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3301   }
3302 
3303   /* read in row lengths */
3304   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3305   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3306   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3307 
3308   /* read in column indices */
3309   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3310   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3311   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3312 
3313   /* loop over row lengths determining block row lengths */
3314   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3315   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3316   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
3317   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3318   masked   = mask + mbs;
3319   rowcount = 0; nzcount = 0;
3320   for (i=0; i<mbs; i++) {
3321     nmask = 0;
3322     for (j=0; j<bs; j++) {
3323       kmax = rowlengths[rowcount];
3324       for (k=0; k<kmax; k++) {
3325         tmp = jj[nzcount++]/bs;
3326         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3327       }
3328       rowcount++;
3329     }
3330     browlengths[i] += nmask;
3331     /* zero out the mask elements we set */
3332     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3333   }
3334 
3335   /* create our matrix */
3336   ierr = MatCreate(comm,&B);
3337   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3338   ierr = MatSetType(B,type);CHKERRQ(ierr);
3339   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3340   a = (Mat_SeqBAIJ*)B->data;
3341 
3342   /* set matrix "i" values */
3343   a->i[0] = 0;
3344   for (i=1; i<= mbs; i++) {
3345     a->i[i]      = a->i[i-1] + browlengths[i-1];
3346     a->ilen[i-1] = browlengths[i-1];
3347   }
3348   a->nz         = 0;
3349   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3350 
3351   /* read in nonzero values */
3352   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3353   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3354   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3355 
3356   /* set "a" and "j" values into matrix */
3357   nzcount = 0; jcount = 0;
3358   for (i=0; i<mbs; i++) {
3359     nzcountb = nzcount;
3360     nmask    = 0;
3361     for (j=0; j<bs; j++) {
3362       kmax = rowlengths[i*bs+j];
3363       for (k=0; k<kmax; k++) {
3364         tmp = jj[nzcount++]/bs;
3365 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3366       }
3367     }
3368     /* sort the masked values */
3369     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3370 
3371     /* set "j" values into matrix */
3372     maskcount = 1;
3373     for (j=0; j<nmask; j++) {
3374       a->j[jcount++]  = masked[j];
3375       mask[masked[j]] = maskcount++;
3376     }
3377     /* set "a" values into matrix */
3378     ishift = bs2*a->i[i];
3379     for (j=0; j<bs; j++) {
3380       kmax = rowlengths[i*bs+j];
3381       for (k=0; k<kmax; k++) {
3382         tmp       = jj[nzcountb]/bs ;
3383         block     = mask[tmp] - 1;
3384         point     = jj[nzcountb] - bs*tmp;
3385         idx       = ishift + bs2*block + j + bs*point;
3386         a->a[idx] = (MatScalar)aa[nzcountb++];
3387       }
3388     }
3389     /* zero out the mask elements we set */
3390     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3391   }
3392   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3393 
3394   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3395   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3396   ierr = PetscFree(aa);CHKERRQ(ierr);
3397   ierr = PetscFree(jj);CHKERRQ(ierr);
3398   ierr = PetscFree(mask);CHKERRQ(ierr);
3399 
3400   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3401   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3402   ierr = MatView_Private(B);CHKERRQ(ierr);
3403 
3404   *A = B;
3405   PetscFunctionReturn(0);
3406 }
3407 
3408 #undef __FUNCT__
3409 #define __FUNCT__ "MatCreateSeqBAIJ"
3410 /*@C
3411    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3412    compressed row) format.  For good matrix assembly performance the
3413    user should preallocate the matrix storage by setting the parameter nz
3414    (or the array nnz).  By setting these parameters accurately, performance
3415    during matrix assembly can be increased by more than a factor of 50.
3416 
3417    Collective on MPI_Comm
3418 
3419    Input Parameters:
3420 +  comm - MPI communicator, set to PETSC_COMM_SELF
3421 .  bs - size of block
3422 .  m - number of rows
3423 .  n - number of columns
3424 .  nz - number of nonzero blocks  per block row (same for all rows)
3425 -  nnz - array containing the number of nonzero blocks in the various block rows
3426          (possibly different for each block row) or PETSC_NULL
3427 
3428    Output Parameter:
3429 .  A - the matrix
3430 
3431    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3432    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3433    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3434 
3435    Options Database Keys:
3436 .   -mat_no_unroll - uses code that does not unroll the loops in the
3437                      block calculations (much slower)
3438 .    -mat_block_size - size of the blocks to use
3439 
3440    Level: intermediate
3441 
3442    Notes:
3443    The number of rows and columns must be divisible by blocksize.
3444 
3445    If the nnz parameter is given then the nz parameter is ignored
3446 
3447    A nonzero block is any block that as 1 or more nonzeros in it
3448 
3449    The block AIJ format is fully compatible with standard Fortran 77
3450    storage.  That is, the stored row and column indices can begin at
3451    either one (as in Fortran) or zero.  See the users' manual for details.
3452 
3453    Specify the preallocated storage with either nz or nnz (not both).
3454    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3455    allocation.  For additional details, see the users manual chapter on
3456    matrices.
3457 
3458 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3459 @*/
3460 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3461 {
3462   PetscErrorCode ierr;
3463 
3464   PetscFunctionBegin;
3465   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3466   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3467   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3468   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3469   PetscFunctionReturn(0);
3470 }
3471 
3472 #undef __FUNCT__
3473 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3474 /*@C
3475    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3476    per row in the matrix. For good matrix assembly performance the
3477    user should preallocate the matrix storage by setting the parameter nz
3478    (or the array nnz).  By setting these parameters accurately, performance
3479    during matrix assembly can be increased by more than a factor of 50.
3480 
3481    Collective on MPI_Comm
3482 
3483    Input Parameters:
3484 +  A - the matrix
3485 .  bs - size of block
3486 .  nz - number of block nonzeros per block row (same for all rows)
3487 -  nnz - array containing the number of block nonzeros in the various block rows
3488          (possibly different for each block row) or PETSC_NULL
3489 
3490    Options Database Keys:
3491 .   -mat_no_unroll - uses code that does not unroll the loops in the
3492                      block calculations (much slower)
3493 .    -mat_block_size - size of the blocks to use
3494 
3495    Level: intermediate
3496 
3497    Notes:
3498    If the nnz parameter is given then the nz parameter is ignored
3499 
3500    You can call MatGetInfo() to get information on how effective the preallocation was;
3501    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3502    You can also run with the option -info and look for messages with the string
3503    malloc in them to see if additional memory allocation was needed.
3504 
3505    The block AIJ format is fully compatible with standard Fortran 77
3506    storage.  That is, the stored row and column indices can begin at
3507    either one (as in Fortran) or zero.  See the users' manual for details.
3508 
3509    Specify the preallocated storage with either nz or nnz (not both).
3510    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3511    allocation.  For additional details, see the users manual chapter on
3512    matrices.
3513 
3514 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3515 @*/
3516 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3517 {
3518   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3519 
3520   PetscFunctionBegin;
3521   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3522   if (f) {
3523     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3524   }
3525   PetscFunctionReturn(0);
3526 }
3527 
3528 #undef __FUNCT__
3529 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3530 /*@C
3531    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3532    (the default sequential PETSc format).
3533 
3534    Collective on MPI_Comm
3535 
3536    Input Parameters:
3537 +  A - the matrix
3538 .  i - the indices into j for the start of each local row (starts with zero)
3539 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3540 -  v - optional values in the matrix
3541 
3542    Level: developer
3543 
3544 .keywords: matrix, aij, compressed row, sparse
3545 
3546 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3547 @*/
3548 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3549 {
3550   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3551 
3552   PetscFunctionBegin;
3553   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3554   if (f) {
3555     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3556   }
3557   PetscFunctionReturn(0);
3558 }
3559 
3560 
3561 #undef __FUNCT__
3562 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3563 /*@
3564      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3565               (upper triangular entries in CSR format) provided by the user.
3566 
3567      Collective on MPI_Comm
3568 
3569    Input Parameters:
3570 +  comm - must be an MPI communicator of size 1
3571 .  bs - size of block
3572 .  m - number of rows
3573 .  n - number of columns
3574 .  i - row indices
3575 .  j - column indices
3576 -  a - matrix values
3577 
3578    Output Parameter:
3579 .  mat - the matrix
3580 
3581    Level: intermediate
3582 
3583    Notes:
3584        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3585     once the matrix is destroyed
3586 
3587        You cannot set new nonzero locations into this matrix, that will generate an error.
3588 
3589        The i and j indices are 0 based
3590 
3591 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3592 
3593 @*/
3594 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3595 {
3596   PetscErrorCode ierr;
3597   PetscInt       ii;
3598   Mat_SeqBAIJ    *baij;
3599 
3600   PetscFunctionBegin;
3601   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3602   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3603 
3604   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3605   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3606   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3607   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3608   baij = (Mat_SeqBAIJ*)(*mat)->data;
3609   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3610 
3611   baij->i = i;
3612   baij->j = j;
3613   baij->a = a;
3614   baij->singlemalloc = PETSC_FALSE;
3615   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3616   baij->free_a       = PETSC_FALSE;
3617   baij->free_ij       = PETSC_FALSE;
3618 
3619   for (ii=0; ii<m; ii++) {
3620     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3621 #if defined(PETSC_USE_DEBUG)
3622     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3623 #endif
3624   }
3625 #if defined(PETSC_USE_DEBUG)
3626   for (ii=0; ii<baij->i[m]; ii++) {
3627     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3628     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3629   }
3630 #endif
3631 
3632   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3633   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3634   PetscFunctionReturn(0);
3635 }
3636