xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 3acb87959449f26430bfb44c593b059e1dfb39fd)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"
8 #include "petscsys.h"                     /*I "petscmat.h" I*/
9 
10 #include "../src/mat/blockinvert.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
14 /*@
15   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
16 
17   Collective on Mat
18 
19   Input Parameters:
20 . mat - the matrix
21 
22   Level: advanced
23 @*/
24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
25 {
26   PetscErrorCode ierr,(*f)(Mat);
27 
28   PetscFunctionBegin;
29   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
30   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
31   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
32 
33   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
34   if (f) {
35     ierr = (*f)(mat);CHKERRQ(ierr);
36   } else {
37     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 EXTERN_C_BEGIN
43 #undef __FUNCT__
44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
46 {
47   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
48   PetscErrorCode ierr;
49   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
50   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
51   PetscReal      shift = 0.0;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 1:
65       for (i=0; i<mbs; i++) {
66         odiag = v + 1*diag_offset[i];
67         diag[0]  = odiag[0];
68         mdiag[0] = odiag[0];
69         diag[0]  = 1.0 / (diag[0] + shift);
70         diag    += 1;
71         mdiag   += 1;
72       }
73       break;
74     case 2:
75       for (i=0; i<mbs; i++) {
76         odiag   = v + 4*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
80 	diag    += 4;
81 	mdiag   += 4;
82       }
83       break;
84     case 3:
85       for (i=0; i<mbs; i++) {
86         odiag    = v + 9*diag_offset[i];
87         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
88         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
89         diag[8]  = odiag[8];
90         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
91         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
92         mdiag[8] = odiag[8];
93 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
94 	diag    += 9;
95 	mdiag   += 9;
96       }
97       break;
98     case 4:
99       for (i=0; i<mbs; i++) {
100         odiag  = v + 16*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
104 	diag  += 16;
105 	mdiag += 16;
106       }
107       break;
108     case 5:
109       for (i=0; i<mbs; i++) {
110         odiag = v + 25*diag_offset[i];
111         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
114 	diag  += 25;
115 	mdiag += 25;
116       }
117       break;
118     case 6:
119       for (i=0; i<mbs; i++) {
120         odiag = v + 36*diag_offset[i];
121         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
124 	diag  += 36;
125 	mdiag += 36;
126       }
127       break;
128     default:
129       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
130   }
131   a->idiagvalid = PETSC_TRUE;
132   PetscFunctionReturn(0);
133 }
134 EXTERN_C_END
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
138 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
139 {
140   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
141   PetscScalar        *x,x1,s1;
142   const PetscScalar  *b;
143   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
144   PetscErrorCode     ierr;
145   PetscInt           m = a->mbs,i,i2,nz,idx;
146   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
147 
148   PetscFunctionBegin;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
150   its = its*lits;
151   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
152   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
153   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
154   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
155   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
156 
157   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
158 
159   diag  = a->diag;
160   idiag = a->idiag;
161   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
162   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
163 
164   if (flag & SOR_ZERO_INITIAL_GUESS) {
165     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
166       x[0] = b[0]*idiag[0];
167       i2     = 1;
168       idiag += 1;
169       for (i=1; i<m; i++) {
170         v     = aa + ai[i];
171         vi    = aj + ai[i];
172         nz    = diag[i] - ai[i];
173         s1    = b[i2];
174         while (nz--) {
175           idx  = (*vi++);
176           x1   = x[idx];
177           s1  -= v[0]*x1;
178           v   += 1;
179         }
180         x[i2]   = idiag[0]*s1;
181         idiag   += 1;
182         i2      += 1;
183       }
184       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
185       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
186     }
187     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
188         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
189       i2    = 0;
190       mdiag = a->idiag+a->mbs;
191       for (i=0; i<m; i++) {
192         x1      = x[i2];
193         x[i2]   = mdiag[0]*x1;
194         mdiag  += 1;
195         i2     += 1;
196       }
197       ierr = PetscLogFlops(m);CHKERRQ(ierr);
198     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
199       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
200     }
201     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
202       idiag   = a->idiag+a->mbs - 1;
203       i2      = m - 1;
204       x1      = x[i2];
205       x[i2]   = idiag[0]*x1;
206       idiag -= 1;
207       i2    -= 1;
208       for (i=m-2; i>=0; i--) {
209         v     = aa + (diag[i]+1);
210         vi    = aj + diag[i] + 1;
211         nz    = ai[i+1] - diag[i] - 1;
212         s1    = x[i2];
213         while (nz--) {
214           idx  = (*vi++);
215           x1   = x[idx];
216           s1  -= v[0]*x1;
217           v   += 1;
218         }
219         x[i2]   = idiag[0]*s1;
220         idiag   -= 1;
221         i2      -= 1;
222       }
223       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
224     }
225   } else {
226     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
227   }
228   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
229   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
235 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
236 {
237   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
238   PetscScalar        *x,x1,x2,s1,s2;
239   const PetscScalar  *b;
240   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
241   PetscErrorCode     ierr;
242   PetscInt           m = a->mbs,i,i2,nz,idx;
243   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
244 
245   PetscFunctionBegin;
246   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
247   its = its*lits;
248   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
249   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
250   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
251   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
252   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
253 
254   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
255 
256   diag  = a->diag;
257   idiag = a->idiag;
258   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
259   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
260 
261   if (flag & SOR_ZERO_INITIAL_GUESS) {
262     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
263       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
264       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
265       i2     = 2;
266       idiag += 4;
267       for (i=1; i<m; i++) {
268 	v     = aa + 4*ai[i];
269 	vi    = aj + ai[i];
270 	nz    = diag[i] - ai[i];
271 	s1    = b[i2]; s2 = b[i2+1];
272 	while (nz--) {
273 	  idx  = 2*(*vi++);
274 	  x1   = x[idx]; x2 = x[1+idx];
275 	  s1  -= v[0]*x1 + v[2]*x2;
276 	  s2  -= v[1]*x1 + v[3]*x2;
277 	  v   += 4;
278 	}
279 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
280 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
281         idiag   += 4;
282         i2      += 2;
283       }
284       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
285       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
286     }
287     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
288         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
289       i2    = 0;
290       mdiag = a->idiag+4*a->mbs;
291       for (i=0; i<m; i++) {
292         x1      = x[i2]; x2 = x[i2+1];
293         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
294         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
295         mdiag  += 4;
296         i2     += 2;
297       }
298       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
299     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
300       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
301     }
302     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
303       idiag   = a->idiag+4*a->mbs - 4;
304       i2      = 2*m - 2;
305       x1      = x[i2]; x2 = x[i2+1];
306       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
307       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
308       idiag -= 4;
309       i2    -= 2;
310       for (i=m-2; i>=0; i--) {
311 	v     = aa + 4*(diag[i]+1);
312 	vi    = aj + diag[i] + 1;
313 	nz    = ai[i+1] - diag[i] - 1;
314 	s1    = x[i2]; s2 = x[i2+1];
315 	while (nz--) {
316 	  idx  = 2*(*vi++);
317 	  x1   = x[idx]; x2 = x[1+idx];
318 	  s1  -= v[0]*x1 + v[2]*x2;
319 	  s2  -= v[1]*x1 + v[3]*x2;
320 	  v   += 4;
321 	}
322 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
323 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
324         idiag   -= 4;
325         i2      -= 2;
326       }
327       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
328     }
329   } else {
330     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
331   }
332   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
333   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
339 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
340 {
341   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
342   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
343   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
344   const PetscScalar  *b;
345   PetscErrorCode     ierr;
346   PetscInt           m = a->mbs,i,i2,nz,idx;
347   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
348 
349   PetscFunctionBegin;
350   its = its*lits;
351   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
352   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
353   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
354   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
355   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
356   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
357 
358   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
359 
360   diag  = a->diag;
361   idiag = a->idiag;
362   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
363   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
364 
365   if (flag & SOR_ZERO_INITIAL_GUESS) {
366     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
367       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
368       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
369       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
370       i2     = 3;
371       idiag += 9;
372       for (i=1; i<m; i++) {
373         v     = aa + 9*ai[i];
374         vi    = aj + ai[i];
375         nz    = diag[i] - ai[i];
376         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
377         while (nz--) {
378           idx  = 3*(*vi++);
379           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
380           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
381           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
382           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
383           v   += 9;
384         }
385         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
386         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
387         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
388         idiag   += 9;
389         i2      += 3;
390       }
391       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
392       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
393     }
394     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
395         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
396       i2    = 0;
397       mdiag = a->idiag+9*a->mbs;
398       for (i=0; i<m; i++) {
399         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
400         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
401         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
402         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
403         mdiag  += 9;
404         i2     += 3;
405       }
406       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
407     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
408       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
409     }
410     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
411       idiag   = a->idiag+9*a->mbs - 9;
412       i2      = 3*m - 3;
413       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
414       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
415       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
416       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
417       idiag -= 9;
418       i2    -= 3;
419       for (i=m-2; i>=0; i--) {
420         v     = aa + 9*(diag[i]+1);
421         vi    = aj + diag[i] + 1;
422         nz    = ai[i+1] - diag[i] - 1;
423         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
424         while (nz--) {
425           idx  = 3*(*vi++);
426           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
427           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
428           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
429           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
430           v   += 9;
431         }
432         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
433         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
434         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
435         idiag   -= 9;
436         i2      -= 3;
437       }
438       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
439     }
440   } else {
441     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
442   }
443   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
444   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
450 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
451 {
452   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
453   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
454   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
455   const PetscScalar  *b;
456   PetscErrorCode     ierr;
457   PetscInt           m = a->mbs,i,i2,nz,idx;
458   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
459 
460   PetscFunctionBegin;
461   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
462   its = its*lits;
463   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
464   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
465   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
466   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
467   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
468 
469   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
470 
471   diag  = a->diag;
472   idiag = a->idiag;
473   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
474   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
475 
476   if (flag & SOR_ZERO_INITIAL_GUESS) {
477     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
478       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
479       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
480       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
481       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
482       i2     = 4;
483       idiag += 16;
484       for (i=1; i<m; i++) {
485 	v     = aa + 16*ai[i];
486 	vi    = aj + ai[i];
487 	nz    = diag[i] - ai[i];
488 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
489 	while (nz--) {
490 	  idx  = 4*(*vi++);
491 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
492 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
493 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
494 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
495 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
496 	  v   += 16;
497 	}
498 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
499 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
500 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
501 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
502         idiag   += 16;
503         i2      += 4;
504       }
505       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
506       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
507     }
508     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
509         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
510       i2    = 0;
511       mdiag = a->idiag+16*a->mbs;
512       for (i=0; i<m; i++) {
513         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
515         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
516         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
517         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
518         mdiag  += 16;
519         i2     += 4;
520       }
521       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
522     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
523       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
524     }
525     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
526       idiag   = a->idiag+16*a->mbs - 16;
527       i2      = 4*m - 4;
528       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
529       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
530       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
531       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
532       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
533       idiag -= 16;
534       i2    -= 4;
535       for (i=m-2; i>=0; i--) {
536 	v     = aa + 16*(diag[i]+1);
537 	vi    = aj + diag[i] + 1;
538 	nz    = ai[i+1] - diag[i] - 1;
539 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
540 	while (nz--) {
541 	  idx  = 4*(*vi++);
542 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
543 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
544 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
545 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
546 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
547 	  v   += 16;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
550 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
551 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
552 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
553         idiag   -= 16;
554         i2      -= 4;
555       }
556       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
568 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
569 {
570   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
571   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
572   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
573   const PetscScalar  *b;
574   PetscErrorCode     ierr;
575   PetscInt           m = a->mbs,i,i2,nz,idx;
576   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
577 
578   PetscFunctionBegin;
579   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
580   its = its*lits;
581   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
582   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
583   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
584   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
585   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
586 
587   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
588 
589   diag  = a->diag;
590   idiag = a->idiag;
591   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
592   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
593 
594   if (flag & SOR_ZERO_INITIAL_GUESS) {
595     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
596       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
597       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
598       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
599       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
600       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
601       i2     = 5;
602       idiag += 25;
603       for (i=1; i<m; i++) {
604 	v     = aa + 25*ai[i];
605 	vi    = aj + ai[i];
606 	nz    = diag[i] - ai[i];
607 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
608 	while (nz--) {
609 	  idx  = 5*(*vi++);
610 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
611 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
612 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
613 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
614 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
615 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
616 	  v   += 25;
617 	}
618 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
619 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
620 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
621 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
622 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
623         idiag   += 25;
624         i2      += 5;
625       }
626       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
627       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
628     }
629     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
630         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
631       i2    = 0;
632       mdiag = a->idiag+25*a->mbs;
633       for (i=0; i<m; i++) {
634         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
635         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
636         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
637         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
638         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
639         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
640         mdiag  += 25;
641         i2     += 5;
642       }
643       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
644     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
645       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
646     }
647     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
648       idiag   = a->idiag+25*a->mbs - 25;
649       i2      = 5*m - 5;
650       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
651       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
652       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
653       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
654       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
655       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
656       idiag -= 25;
657       i2    -= 5;
658       for (i=m-2; i>=0; i--) {
659 	v     = aa + 25*(diag[i]+1);
660 	vi    = aj + diag[i] + 1;
661 	nz    = ai[i+1] - diag[i] - 1;
662 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
663 	while (nz--) {
664 	  idx  = 5*(*vi++);
665 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
666 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
667 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
668 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
669 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
670 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
671 	  v   += 25;
672 	}
673 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
674 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
675 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
676 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
677 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
678         idiag   -= 25;
679         i2      -= 5;
680       }
681       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
682     }
683   } else {
684     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
685   }
686   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
687   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
693 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
694 {
695   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
696   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
697   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
698   const PetscScalar  *b;
699   PetscErrorCode     ierr;
700   PetscInt           m = a->mbs,i,i2,nz,idx;
701   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
702 
703   PetscFunctionBegin;
704   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
705   its = its*lits;
706   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
707   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
708   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
709   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
710   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
711 
712   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
713 
714   diag  = a->diag;
715   idiag = a->idiag;
716   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
717   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
718 
719   if (flag & SOR_ZERO_INITIAL_GUESS) {
720     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
721       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
722       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
723       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
724       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
725       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
726       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
727       i2     = 6;
728       idiag += 36;
729       for (i=1; i<m; i++) {
730         v     = aa + 36*ai[i];
731         vi    = aj + ai[i];
732         nz    = diag[i] - ai[i];
733         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
734         while (nz--) {
735           idx  = 6*(*vi++);
736           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
737           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
738           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
739           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
740           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
741           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
742           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
743           v   += 36;
744         }
745         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
746         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
747         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
748         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
749         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
750         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
751         idiag   += 36;
752         i2      += 6;
753       }
754       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
755       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
756     }
757     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
758         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
759       i2    = 0;
760       mdiag = a->idiag+36*a->mbs;
761       for (i=0; i<m; i++) {
762         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
763         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
764         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
765         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
766         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
767         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
768         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
769         mdiag  += 36;
770         i2     += 6;
771       }
772       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
773     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
774       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
775     }
776     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
777       idiag   = a->idiag+36*a->mbs - 36;
778       i2      = 6*m - 6;
779       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
780       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
781       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
782       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
783       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
784       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
785       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
786       idiag -= 36;
787       i2    -= 6;
788       for (i=m-2; i>=0; i--) {
789         v     = aa + 36*(diag[i]+1);
790         vi    = aj + diag[i] + 1;
791         nz    = ai[i+1] - diag[i] - 1;
792         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
793         while (nz--) {
794           idx  = 6*(*vi++);
795           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
796           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
797           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
798           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
799           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
800           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
801           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
802           v   += 36;
803         }
804         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
805         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
806         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
807         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
808         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
809         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
810         idiag   -= 36;
811         i2      -= 6;
812       }
813       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
814     }
815   } else {
816     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
817   }
818   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
819   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
825 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
826 {
827   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
828   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
829   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
830   const PetscScalar  *b;
831   PetscErrorCode     ierr;
832   PetscInt           m = a->mbs,i,i2,nz,idx;
833   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
834 
835   PetscFunctionBegin;
836   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
837   its = its*lits;
838   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
839   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
840   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
841   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
842   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
843 
844   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
845 
846   diag  = a->diag;
847   idiag = a->idiag;
848   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
849   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
850 
851   if (flag & SOR_ZERO_INITIAL_GUESS) {
852     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
853       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
854       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
855       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
856       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
857       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
858       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
859       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
860       i2     = 7;
861       idiag += 49;
862       for (i=1; i<m; i++) {
863         v     = aa + 49*ai[i];
864         vi    = aj + ai[i];
865         nz    = diag[i] - ai[i];
866         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
867         while (nz--) {
868           idx  = 7*(*vi++);
869           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
870           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
871           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
872           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
873           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
874           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
875           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
876           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
877           v   += 49;
878         }
879         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
880         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
881         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
882         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
883         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
884         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
885         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
886         idiag   += 49;
887         i2      += 7;
888       }
889       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
890       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
891     }
892     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
893         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
894       i2    = 0;
895       mdiag = a->idiag+49*a->mbs;
896       for (i=0; i<m; i++) {
897         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
898         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
899         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
900         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
901         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
902         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
903         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
904         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
905         mdiag  += 36;
906         i2     += 6;
907       }
908       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
909     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
910       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
911     }
912     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
913       idiag   = a->idiag+49*a->mbs - 49;
914       i2      = 7*m - 7;
915       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
916       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
917       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
918       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
919       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
920       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
921       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
922       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
923       idiag -= 49;
924       i2    -= 7;
925       for (i=m-2; i>=0; i--) {
926         v     = aa + 49*(diag[i]+1);
927         vi    = aj + diag[i] + 1;
928         nz    = ai[i+1] - diag[i] - 1;
929         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
930         while (nz--) {
931           idx  = 7*(*vi++);
932           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
933           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
934           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
935           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
936           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
937           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
938           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
939           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
940           v   += 49;
941         }
942         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
943         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
944         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
945         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
946         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
947         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
948         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
949         idiag   -= 49;
950         i2      -= 7;
951       }
952       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
953     }
954   } else {
955     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
956   }
957   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
958   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 /*
963     Special version for direct calls from Fortran (Used in PETSc-fun3d)
964 */
965 #if defined(PETSC_HAVE_FORTRAN_CAPS)
966 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
967 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
968 #define matsetvaluesblocked4_ matsetvaluesblocked4
969 #endif
970 
971 EXTERN_C_BEGIN
972 #undef __FUNCT__
973 #define __FUNCT__ "matsetvaluesblocked4_"
974 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
975 {
976   Mat               A = *AA;
977   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
978   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
979   PetscInt          *ai=a->i,*ailen=a->ilen;
980   PetscInt          *aj=a->j,stepval,lastcol = -1;
981   const PetscScalar *value = v;
982   MatScalar         *ap,*aa = a->a,*bap;
983 
984   PetscFunctionBegin;
985   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
986   stepval = (n-1)*4;
987   for (k=0; k<m; k++) { /* loop over added rows */
988     row  = im[k];
989     rp   = aj + ai[row];
990     ap   = aa + 16*ai[row];
991     nrow = ailen[row];
992     low  = 0;
993     high = nrow;
994     for (l=0; l<n; l++) { /* loop over added columns */
995       col = in[l];
996       if (col <= lastcol) low = 0; else high = nrow;
997       lastcol = col;
998       value = v + k*(stepval+4 + l)*4;
999       while (high-low > 7) {
1000         t = (low+high)/2;
1001         if (rp[t] > col) high = t;
1002         else             low  = t;
1003       }
1004       for (i=low; i<high; i++) {
1005         if (rp[i] > col) break;
1006         if (rp[i] == col) {
1007           bap  = ap +  16*i;
1008           for (ii=0; ii<4; ii++,value+=stepval) {
1009             for (jj=ii; jj<16; jj+=4) {
1010               bap[jj] += *value++;
1011             }
1012           }
1013           goto noinsert2;
1014         }
1015       }
1016       N = nrow++ - 1;
1017       high++; /* added new column index thus must search to one higher than before */
1018       /* shift up all the later entries in this row */
1019       for (ii=N; ii>=i; ii--) {
1020         rp[ii+1] = rp[ii];
1021         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1022       }
1023       if (N >= i) {
1024         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1025       }
1026       rp[i] = col;
1027       bap   = ap +  16*i;
1028       for (ii=0; ii<4; ii++,value+=stepval) {
1029         for (jj=ii; jj<16; jj+=4) {
1030           bap[jj] = *value++;
1031         }
1032       }
1033       noinsert2:;
1034       low = i;
1035     }
1036     ailen[row] = nrow;
1037   }
1038   PetscFunctionReturnVoid();
1039 }
1040 EXTERN_C_END
1041 
1042 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1043 #define matsetvalues4_ MATSETVALUES4
1044 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1045 #define matsetvalues4_ matsetvalues4
1046 #endif
1047 
1048 EXTERN_C_BEGIN
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatSetValues4_"
1051 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1052 {
1053   Mat         A = *AA;
1054   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1055   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1056   PetscInt    *ai=a->i,*ailen=a->ilen;
1057   PetscInt    *aj=a->j,brow,bcol;
1058   PetscInt    ridx,cidx,lastcol = -1;
1059   MatScalar   *ap,value,*aa=a->a,*bap;
1060 
1061   PetscFunctionBegin;
1062   for (k=0; k<m; k++) { /* loop over added rows */
1063     row  = im[k]; brow = row/4;
1064     rp   = aj + ai[brow];
1065     ap   = aa + 16*ai[brow];
1066     nrow = ailen[brow];
1067     low  = 0;
1068     high = nrow;
1069     for (l=0; l<n; l++) { /* loop over added columns */
1070       col = in[l]; bcol = col/4;
1071       ridx = row % 4; cidx = col % 4;
1072       value = v[l + k*n];
1073       if (col <= lastcol) low = 0; else high = nrow;
1074       lastcol = col;
1075       while (high-low > 7) {
1076         t = (low+high)/2;
1077         if (rp[t] > bcol) high = t;
1078         else              low  = t;
1079       }
1080       for (i=low; i<high; i++) {
1081         if (rp[i] > bcol) break;
1082         if (rp[i] == bcol) {
1083           bap  = ap +  16*i + 4*cidx + ridx;
1084           *bap += value;
1085           goto noinsert1;
1086         }
1087       }
1088       N = nrow++ - 1;
1089       high++; /* added new column thus must search to one higher than before */
1090       /* shift up all the later entries in this row */
1091       for (ii=N; ii>=i; ii--) {
1092         rp[ii+1] = rp[ii];
1093         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1094       }
1095       if (N>=i) {
1096         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1097       }
1098       rp[i]                    = bcol;
1099       ap[16*i + 4*cidx + ridx] = value;
1100       noinsert1:;
1101       low = i;
1102     }
1103     ailen[brow] = nrow;
1104   }
1105   PetscFunctionReturnVoid();
1106 }
1107 EXTERN_C_END
1108 
1109 /*
1110      Checks for missing diagonals
1111 */
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1114 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1115 {
1116   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1117   PetscErrorCode ierr;
1118   PetscInt       *diag,*jj = a->j,i;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1122   *missing = PETSC_FALSE;
1123   if (A->rmap->n > 0 && !jj) {
1124     *missing  = PETSC_TRUE;
1125     if (d) *d = 0;
1126     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1127   } else {
1128     diag     = a->diag;
1129     for (i=0; i<a->mbs; i++) {
1130       if (jj[diag[i]] != i) {
1131         *missing  = PETSC_TRUE;
1132         if (d) *d = i;
1133         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1134       }
1135     }
1136   }
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1142 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1143 {
1144   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1145   PetscErrorCode ierr;
1146   PetscInt       i,j,m = a->mbs;
1147 
1148   PetscFunctionBegin;
1149   if (!a->diag) {
1150     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1151   }
1152   for (i=0; i<m; i++) {
1153     a->diag[i] = a->i[i+1];
1154     for (j=a->i[i]; j<a->i[i+1]; j++) {
1155       if (a->j[j] == i) {
1156         a->diag[i] = j;
1157         break;
1158       }
1159     }
1160   }
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 
1165 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1169 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1170 {
1171   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1172   PetscErrorCode ierr;
1173   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1174   PetscInt       *tia, *tja;
1175 
1176   PetscFunctionBegin;
1177   *nn = n;
1178   if (!ia) PetscFunctionReturn(0);
1179   if (symmetric) {
1180     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1181   } else {
1182     tia = a->i; tja = a->j;
1183   }
1184 
1185   if (!blockcompressed && bs > 1) {
1186     (*nn) *= bs;
1187     nbs    = bs;
1188     /* malloc & create the natural set of indices */
1189     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1190     if (n) {
1191       (*ia)[0] = 0;
1192       for (j=1; j<bs; j++) {
1193         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1194       }
1195     }
1196 
1197     for (i=1; i<n; i++) {
1198       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1199       for (j=1; j<bs; j++) {
1200         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1201       }
1202     }
1203     if (n) {
1204       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1205     }
1206 
1207     if (ja) {
1208       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1209       cnt = 0;
1210       for (i=0; i<n; i++) {
1211         for (j=0; j<bs; j++) {
1212           for (k=tia[i]; k<tia[i+1]; k++) {
1213             for (l=0; l<bs; l++) {
1214               (*ja)[cnt++] = bs*tja[k] + l;
1215 	    }
1216           }
1217         }
1218       }
1219     }
1220 
1221     n     *= bs;
1222     nz *= bs*bs;
1223     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1224       ierr = PetscFree(tia);CHKERRQ(ierr);
1225       ierr = PetscFree(tja);CHKERRQ(ierr);
1226     }
1227   } else {
1228     *ia = tia;
1229     if (ja) *ja = tja;
1230   }
1231   if (oshift == 1) {
1232     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1233     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1234   }
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1240 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1241 {
1242   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1243   PetscErrorCode ierr;
1244   PetscInt       i,n = a->mbs,nz = a->i[n];
1245 
1246   PetscFunctionBegin;
1247   if (!ia) PetscFunctionReturn(0);
1248   if (!blockcompressed && A->rmap->bs > 1) {
1249     ierr = PetscFree(*ia);CHKERRQ(ierr);
1250     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1251   } else if (symmetric) {
1252     ierr = PetscFree(*ia);CHKERRQ(ierr);
1253     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1254   } else if (oshift == 1) { /* blockcompressed */
1255     for (i=0; i<n+1; i++) a->i[i]--;
1256     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1257   }
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1263 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1264 {
1265   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1266   PetscErrorCode ierr;
1267 
1268   PetscFunctionBegin;
1269 #if defined(PETSC_USE_LOG)
1270   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1271 #endif
1272   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1273   if (a->row) {
1274     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1275   }
1276   if (a->col) {
1277     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1278   }
1279   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1280   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1281   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1282   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1283   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1284   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1285   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1286   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1287   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1288 
1289   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1290   ierr = PetscFree(a);CHKERRQ(ierr);
1291 
1292   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1306 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1307 {
1308   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1309   PetscErrorCode ierr;
1310 
1311   PetscFunctionBegin;
1312   switch (op) {
1313   case MAT_ROW_ORIENTED:
1314     a->roworiented    = flg;
1315     break;
1316   case MAT_KEEP_NONZERO_PATTERN:
1317     a->keepnonzeropattern = flg;
1318     break;
1319   case MAT_NEW_NONZERO_LOCATIONS:
1320     a->nonew          = (flg ? 0 : 1);
1321     break;
1322   case MAT_NEW_NONZERO_LOCATION_ERR:
1323     a->nonew          = (flg ? -1 : 0);
1324     break;
1325   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1326     a->nonew          = (flg ? -2 : 0);
1327     break;
1328   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1329     a->nounused       = (flg ? -1 : 0);
1330     break;
1331   case MAT_NEW_DIAGONALS:
1332   case MAT_IGNORE_OFF_PROC_ENTRIES:
1333   case MAT_USE_HASH_TABLE:
1334     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1335     break;
1336   case MAT_SYMMETRIC:
1337   case MAT_STRUCTURALLY_SYMMETRIC:
1338   case MAT_HERMITIAN:
1339   case MAT_SYMMETRY_ETERNAL:
1340     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1341     break;
1342   default:
1343     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1344   }
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1350 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1351 {
1352   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1353   PetscErrorCode ierr;
1354   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1355   MatScalar      *aa,*aa_i;
1356   PetscScalar    *v_i;
1357 
1358   PetscFunctionBegin;
1359   bs  = A->rmap->bs;
1360   ai  = a->i;
1361   aj  = a->j;
1362   aa  = a->a;
1363   bs2 = a->bs2;
1364 
1365   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1366 
1367   bn  = row/bs;   /* Block number */
1368   bp  = row % bs; /* Block Position */
1369   M   = ai[bn+1] - ai[bn];
1370   *nz = bs*M;
1371 
1372   if (v) {
1373     *v = 0;
1374     if (*nz) {
1375       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1376       for (i=0; i<M; i++) { /* for each block in the block row */
1377         v_i  = *v + i*bs;
1378         aa_i = aa + bs2*(ai[bn] + i);
1379         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1380       }
1381     }
1382   }
1383 
1384   if (idx) {
1385     *idx = 0;
1386     if (*nz) {
1387       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1388       for (i=0; i<M; i++) { /* for each block in the block row */
1389         idx_i = *idx + i*bs;
1390         itmp  = bs*aj[ai[bn] + i];
1391         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1392       }
1393     }
1394   }
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 #undef __FUNCT__
1399 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1400 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1401 {
1402   PetscErrorCode ierr;
1403 
1404   PetscFunctionBegin;
1405   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1406   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1412 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1413 {
1414   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1415   Mat            C;
1416   PetscErrorCode ierr;
1417   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1418   PetscInt       *rows,*cols,bs2=a->bs2;
1419   MatScalar      *array;
1420 
1421   PetscFunctionBegin;
1422   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1423   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1424     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1425     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1426 
1427     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1428     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1429     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1430     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1431     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1432     ierr = PetscFree(col);CHKERRQ(ierr);
1433   } else {
1434     C = *B;
1435   }
1436 
1437   array = a->a;
1438   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1439   cols = rows + bs;
1440   for (i=0; i<mbs; i++) {
1441     cols[0] = i*bs;
1442     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1443     len = ai[i+1] - ai[i];
1444     for (j=0; j<len; j++) {
1445       rows[0] = (*aj++)*bs;
1446       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1447       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1448       array += bs2;
1449     }
1450   }
1451   ierr = PetscFree(rows);CHKERRQ(ierr);
1452 
1453   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1454   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1455 
1456   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1457     *B = C;
1458   } else {
1459     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1460   }
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1466 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1467 {
1468   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1469   PetscErrorCode ierr;
1470   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1471   int            fd;
1472   PetscScalar    *aa;
1473   FILE           *file;
1474 
1475   PetscFunctionBegin;
1476   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1477   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1478   col_lens[0] = MAT_FILE_COOKIE;
1479 
1480   col_lens[1] = A->rmap->N;
1481   col_lens[2] = A->cmap->n;
1482   col_lens[3] = a->nz*bs2;
1483 
1484   /* store lengths of each row and write (including header) to file */
1485   count = 0;
1486   for (i=0; i<a->mbs; i++) {
1487     for (j=0; j<bs; j++) {
1488       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1489     }
1490   }
1491   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1492   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1493 
1494   /* store column indices (zero start index) */
1495   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1496   count = 0;
1497   for (i=0; i<a->mbs; i++) {
1498     for (j=0; j<bs; j++) {
1499       for (k=a->i[i]; k<a->i[i+1]; k++) {
1500         for (l=0; l<bs; l++) {
1501           jj[count++] = bs*a->j[k] + l;
1502         }
1503       }
1504     }
1505   }
1506   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1507   ierr = PetscFree(jj);CHKERRQ(ierr);
1508 
1509   /* store nonzero values */
1510   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1511   count = 0;
1512   for (i=0; i<a->mbs; i++) {
1513     for (j=0; j<bs; j++) {
1514       for (k=a->i[i]; k<a->i[i+1]; k++) {
1515         for (l=0; l<bs; l++) {
1516           aa[count++] = a->a[bs2*k + l*bs + j];
1517         }
1518       }
1519     }
1520   }
1521   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1522   ierr = PetscFree(aa);CHKERRQ(ierr);
1523 
1524   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1525   if (file) {
1526     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1527   }
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1533 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1534 {
1535   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1536   PetscErrorCode    ierr;
1537   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1538   PetscViewerFormat format;
1539 
1540   PetscFunctionBegin;
1541   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1542   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1543     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1544   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1545     Mat aij;
1546     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1547     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1548     ierr = MatDestroy(aij);CHKERRQ(ierr);
1549   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1550      PetscFunctionReturn(0);
1551   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1552     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1553     for (i=0; i<a->mbs; i++) {
1554       for (j=0; j<bs; j++) {
1555         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1556         for (k=a->i[i]; k<a->i[i+1]; k++) {
1557           for (l=0; l<bs; l++) {
1558 #if defined(PETSC_USE_COMPLEX)
1559             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1560               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1561                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1562             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1563               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1564                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1565             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1566               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1567             }
1568 #else
1569             if (a->a[bs2*k + l*bs + j] != 0.0) {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1571             }
1572 #endif
1573           }
1574         }
1575         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1576       }
1577     }
1578     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1579   } else {
1580     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1581     for (i=0; i<a->mbs; i++) {
1582       for (j=0; j<bs; j++) {
1583         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1584         for (k=a->i[i]; k<a->i[i+1]; k++) {
1585           for (l=0; l<bs; l++) {
1586 #if defined(PETSC_USE_COMPLEX)
1587             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1588               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1589                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1590             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1591               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1592                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1593             } else {
1594               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1595             }
1596 #else
1597             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1598 #endif
1599           }
1600         }
1601         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1602       }
1603     }
1604     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1605   }
1606   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 #undef __FUNCT__
1611 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1612 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1613 {
1614   Mat            A = (Mat) Aa;
1615   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1616   PetscErrorCode ierr;
1617   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1618   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1619   MatScalar      *aa;
1620   PetscViewer    viewer;
1621 
1622   PetscFunctionBegin;
1623 
1624   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1625   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1626 
1627   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1628 
1629   /* loop over matrix elements drawing boxes */
1630   color = PETSC_DRAW_BLUE;
1631   for (i=0,row=0; i<mbs; i++,row+=bs) {
1632     for (j=a->i[i]; j<a->i[i+1]; j++) {
1633       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1634       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1635       aa = a->a + j*bs2;
1636       for (k=0; k<bs; k++) {
1637         for (l=0; l<bs; l++) {
1638           if (PetscRealPart(*aa++) >=  0.) continue;
1639           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1640         }
1641       }
1642     }
1643   }
1644   color = PETSC_DRAW_CYAN;
1645   for (i=0,row=0; i<mbs; i++,row+=bs) {
1646     for (j=a->i[i]; j<a->i[i+1]; j++) {
1647       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1648       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1649       aa = a->a + j*bs2;
1650       for (k=0; k<bs; k++) {
1651         for (l=0; l<bs; l++) {
1652           if (PetscRealPart(*aa++) != 0.) continue;
1653           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1654         }
1655       }
1656     }
1657   }
1658 
1659   color = PETSC_DRAW_RED;
1660   for (i=0,row=0; i<mbs; i++,row+=bs) {
1661     for (j=a->i[i]; j<a->i[i+1]; j++) {
1662       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1663       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1664       aa = a->a + j*bs2;
1665       for (k=0; k<bs; k++) {
1666         for (l=0; l<bs; l++) {
1667           if (PetscRealPart(*aa++) <= 0.) continue;
1668           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1669         }
1670       }
1671     }
1672   }
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1678 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1679 {
1680   PetscErrorCode ierr;
1681   PetscReal      xl,yl,xr,yr,w,h;
1682   PetscDraw      draw;
1683   PetscTruth     isnull;
1684 
1685   PetscFunctionBegin;
1686 
1687   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1688   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1689 
1690   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1691   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1692   xr += w;    yr += h;  xl = -w;     yl = -h;
1693   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1694   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1695   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatView_SeqBAIJ"
1701 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1702 {
1703   PetscErrorCode ierr;
1704   PetscTruth     iascii,isbinary,isdraw;
1705 
1706   PetscFunctionBegin;
1707   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1708   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1709   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1710   if (iascii){
1711     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1712   } else if (isbinary) {
1713     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1714   } else if (isdraw) {
1715     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1716   } else {
1717     Mat B;
1718     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1719     ierr = MatView(B,viewer);CHKERRQ(ierr);
1720     ierr = MatDestroy(B);CHKERRQ(ierr);
1721   }
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1728 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1729 {
1730   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1731   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1732   PetscInt    *ai = a->i,*ailen = a->ilen;
1733   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1734   MatScalar   *ap,*aa = a->a;
1735 
1736   PetscFunctionBegin;
1737   for (k=0; k<m; k++) { /* loop over rows */
1738     row  = im[k]; brow = row/bs;
1739     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1740     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1741     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1742     nrow = ailen[brow];
1743     for (l=0; l<n; l++) { /* loop over columns */
1744       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1745       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1746       col  = in[l] ;
1747       bcol = col/bs;
1748       cidx = col%bs;
1749       ridx = row%bs;
1750       high = nrow;
1751       low  = 0; /* assume unsorted */
1752       while (high-low > 5) {
1753         t = (low+high)/2;
1754         if (rp[t] > bcol) high = t;
1755         else             low  = t;
1756       }
1757       for (i=low; i<high; i++) {
1758         if (rp[i] > bcol) break;
1759         if (rp[i] == bcol) {
1760           *v++ = ap[bs2*i+bs*cidx+ridx];
1761           goto finished;
1762         }
1763       }
1764       *v++ = 0.0;
1765       finished:;
1766     }
1767   }
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 #define CHUNKSIZE 10
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1774 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1775 {
1776   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1777   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1778   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1779   PetscErrorCode    ierr;
1780   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1781   PetscTruth        roworiented=a->roworiented;
1782   const PetscScalar *value = v;
1783   MatScalar         *ap,*aa = a->a,*bap;
1784 
1785   PetscFunctionBegin;
1786   if (roworiented) {
1787     stepval = (n-1)*bs;
1788   } else {
1789     stepval = (m-1)*bs;
1790   }
1791   for (k=0; k<m; k++) { /* loop over added rows */
1792     row  = im[k];
1793     if (row < 0) continue;
1794 #if defined(PETSC_USE_DEBUG)
1795     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1796 #endif
1797     rp   = aj + ai[row];
1798     ap   = aa + bs2*ai[row];
1799     rmax = imax[row];
1800     nrow = ailen[row];
1801     low  = 0;
1802     high = nrow;
1803     for (l=0; l<n; l++) { /* loop over added columns */
1804       if (in[l] < 0) continue;
1805 #if defined(PETSC_USE_DEBUG)
1806       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1807 #endif
1808       col = in[l];
1809       if (roworiented) {
1810         value = v + k*(stepval+bs)*bs + l*bs;
1811       } else {
1812         value = v + l*(stepval+bs)*bs + k*bs;
1813       }
1814       if (col <= lastcol) low = 0; else high = nrow;
1815       lastcol = col;
1816       while (high-low > 7) {
1817         t = (low+high)/2;
1818         if (rp[t] > col) high = t;
1819         else             low  = t;
1820       }
1821       for (i=low; i<high; i++) {
1822         if (rp[i] > col) break;
1823         if (rp[i] == col) {
1824           bap  = ap +  bs2*i;
1825           if (roworiented) {
1826             if (is == ADD_VALUES) {
1827               for (ii=0; ii<bs; ii++,value+=stepval) {
1828                 for (jj=ii; jj<bs2; jj+=bs) {
1829                   bap[jj] += *value++;
1830                 }
1831               }
1832             } else {
1833               for (ii=0; ii<bs; ii++,value+=stepval) {
1834                 for (jj=ii; jj<bs2; jj+=bs) {
1835                   bap[jj] = *value++;
1836                 }
1837               }
1838             }
1839           } else {
1840             if (is == ADD_VALUES) {
1841               for (ii=0; ii<bs; ii++,value+=stepval) {
1842                 for (jj=0; jj<bs; jj++) {
1843                   *bap++ += *value++;
1844                 }
1845               }
1846             } else {
1847               for (ii=0; ii<bs; ii++,value+=stepval) {
1848                 for (jj=0; jj<bs; jj++) {
1849                   *bap++  = *value++;
1850                 }
1851               }
1852             }
1853           }
1854           goto noinsert2;
1855         }
1856       }
1857       if (nonew == 1) goto noinsert2;
1858       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1859       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1860       N = nrow++ - 1; high++;
1861       /* shift up all the later entries in this row */
1862       for (ii=N; ii>=i; ii--) {
1863         rp[ii+1] = rp[ii];
1864         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1865       }
1866       if (N >= i) {
1867         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1868       }
1869       rp[i] = col;
1870       bap   = ap +  bs2*i;
1871       if (roworiented) {
1872         for (ii=0; ii<bs; ii++,value+=stepval) {
1873           for (jj=ii; jj<bs2; jj+=bs) {
1874             bap[jj] = *value++;
1875           }
1876         }
1877       } else {
1878         for (ii=0; ii<bs; ii++,value+=stepval) {
1879           for (jj=0; jj<bs; jj++) {
1880             *bap++  = *value++;
1881           }
1882         }
1883       }
1884       noinsert2:;
1885       low = i;
1886     }
1887     ailen[row] = nrow;
1888   }
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1894 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1895 {
1896   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1897   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1898   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1899   PetscErrorCode ierr;
1900   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1901   MatScalar      *aa = a->a,*ap;
1902   PetscReal      ratio=0.6;
1903 
1904   PetscFunctionBegin;
1905   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1906 
1907   if (m) rmax = ailen[0];
1908   for (i=1; i<mbs; i++) {
1909     /* move each row back by the amount of empty slots (fshift) before it*/
1910     fshift += imax[i-1] - ailen[i-1];
1911     rmax   = PetscMax(rmax,ailen[i]);
1912     if (fshift) {
1913       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1914       N = ailen[i];
1915       for (j=0; j<N; j++) {
1916         ip[j-fshift] = ip[j];
1917         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1918       }
1919     }
1920     ai[i] = ai[i-1] + ailen[i-1];
1921   }
1922   if (mbs) {
1923     fshift += imax[mbs-1] - ailen[mbs-1];
1924     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1925   }
1926   /* reset ilen and imax for each row */
1927   for (i=0; i<mbs; i++) {
1928     ailen[i] = imax[i] = ai[i+1] - ai[i];
1929   }
1930   a->nz = ai[mbs];
1931 
1932   /* diagonals may have moved, so kill the diagonal pointers */
1933   a->idiagvalid = PETSC_FALSE;
1934   if (fshift && a->diag) {
1935     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1936     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1937     a->diag = 0;
1938   }
1939   if (fshift && a->nounused == -1) {
1940     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1941   }
1942   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1943   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1944   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1945   a->reallocs          = 0;
1946   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1947 
1948   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1949   if (a->compressedrow.use){
1950     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1951   }
1952 
1953   A->same_nonzero = PETSC_TRUE;
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 /*
1958    This function returns an array of flags which indicate the locations of contiguous
1959    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1960    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1961    Assume: sizes should be long enough to hold all the values.
1962 */
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1965 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1966 {
1967   PetscInt   i,j,k,row;
1968   PetscTruth flg;
1969 
1970   PetscFunctionBegin;
1971   for (i=0,j=0; i<n; j++) {
1972     row = idx[i];
1973     if (row%bs!=0) { /* Not the begining of a block */
1974       sizes[j] = 1;
1975       i++;
1976     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1977       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1978       i++;
1979     } else { /* Begining of the block, so check if the complete block exists */
1980       flg = PETSC_TRUE;
1981       for (k=1; k<bs; k++) {
1982         if (row+k != idx[i+k]) { /* break in the block */
1983           flg = PETSC_FALSE;
1984           break;
1985         }
1986       }
1987       if (flg) { /* No break in the bs */
1988         sizes[j] = bs;
1989         i+= bs;
1990       } else {
1991         sizes[j] = 1;
1992         i++;
1993       }
1994     }
1995   }
1996   *bs_max = j;
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2002 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2003 {
2004   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2005   PetscErrorCode ierr;
2006   PetscInt       i,j,k,count,*rows;
2007   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2008   PetscScalar    zero = 0.0;
2009   MatScalar      *aa;
2010 
2011   PetscFunctionBegin;
2012   /* Make a copy of the IS and  sort it */
2013   /* allocate memory for rows,sizes */
2014   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
2015   sizes = rows + is_n;
2016 
2017   /* copy IS values to rows, and sort them */
2018   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2019   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2020   if (baij->keepnonzeropattern) {
2021     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2022     bs_max = is_n;
2023     A->same_nonzero = PETSC_TRUE;
2024   } else {
2025     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2026     A->same_nonzero = PETSC_FALSE;
2027   }
2028 
2029   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2030     row   = rows[j];
2031     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2032     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2033     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2034     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2035       if (diag != 0.0) {
2036         if (baij->ilen[row/bs] > 0) {
2037           baij->ilen[row/bs]       = 1;
2038           baij->j[baij->i[row/bs]] = row/bs;
2039           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2040         }
2041         /* Now insert all the diagonal values for this bs */
2042         for (k=0; k<bs; k++) {
2043           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2044         }
2045       } else { /* (diag == 0.0) */
2046         baij->ilen[row/bs] = 0;
2047       } /* end (diag == 0.0) */
2048     } else { /* (sizes[i] != bs) */
2049 #if defined (PETSC_USE_DEBUG)
2050       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2051 #endif
2052       for (k=0; k<count; k++) {
2053         aa[0] =  zero;
2054         aa    += bs;
2055       }
2056       if (diag != 0.0) {
2057         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2058       }
2059     }
2060   }
2061 
2062   ierr = PetscFree(rows);CHKERRQ(ierr);
2063   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 #undef __FUNCT__
2068 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2069 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2070 {
2071   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2072   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2073   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2074   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2075   PetscErrorCode ierr;
2076   PetscInt       ridx,cidx,bs2=a->bs2;
2077   PetscTruth     roworiented=a->roworiented;
2078   MatScalar      *ap,value,*aa=a->a,*bap;
2079 
2080   PetscFunctionBegin;
2081   for (k=0; k<m; k++) { /* loop over added rows */
2082     row  = im[k];
2083     brow = row/bs;
2084     if (row < 0) continue;
2085 #if defined(PETSC_USE_DEBUG)
2086     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2087 #endif
2088     rp   = aj + ai[brow];
2089     ap   = aa + bs2*ai[brow];
2090     rmax = imax[brow];
2091     nrow = ailen[brow];
2092     low  = 0;
2093     high = nrow;
2094     for (l=0; l<n; l++) { /* loop over added columns */
2095       if (in[l] < 0) continue;
2096 #if defined(PETSC_USE_DEBUG)
2097       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2098 #endif
2099       col = in[l]; bcol = col/bs;
2100       ridx = row % bs; cidx = col % bs;
2101       if (roworiented) {
2102         value = v[l + k*n];
2103       } else {
2104         value = v[k + l*m];
2105       }
2106       if (col <= lastcol) low = 0; else high = nrow;
2107       lastcol = col;
2108       while (high-low > 7) {
2109         t = (low+high)/2;
2110         if (rp[t] > bcol) high = t;
2111         else              low  = t;
2112       }
2113       for (i=low; i<high; i++) {
2114         if (rp[i] > bcol) break;
2115         if (rp[i] == bcol) {
2116           bap  = ap +  bs2*i + bs*cidx + ridx;
2117           if (is == ADD_VALUES) *bap += value;
2118           else                  *bap  = value;
2119           goto noinsert1;
2120         }
2121       }
2122       if (nonew == 1) goto noinsert1;
2123       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2124       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2125       N = nrow++ - 1; high++;
2126       /* shift up all the later entries in this row */
2127       for (ii=N; ii>=i; ii--) {
2128         rp[ii+1] = rp[ii];
2129         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2130       }
2131       if (N>=i) {
2132         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2133       }
2134       rp[i]                      = bcol;
2135       ap[bs2*i + bs*cidx + ridx] = value;
2136       a->nz++;
2137       noinsert1:;
2138       low = i;
2139     }
2140     ailen[brow] = nrow;
2141   }
2142   A->same_nonzero = PETSC_FALSE;
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2147 
2148 #undef __FUNCT__
2149 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2150 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2151 {
2152   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2153   Mat            outA;
2154   PetscErrorCode ierr;
2155   PetscTruth     row_identity,col_identity;
2156 
2157   PetscFunctionBegin;
2158   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2159   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2160   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2161   if (!row_identity || !col_identity) {
2162     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2163   }
2164 
2165   outA          = inA;
2166   inA->factor   = MAT_FACTOR_LU;
2167 
2168   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2169 
2170   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2171   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2172   a->row = row;
2173   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2174   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2175   a->col = col;
2176 
2177   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2178   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2179   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2180 
2181   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2182   if (!a->solve_work) {
2183     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2184     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2185   }
2186   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2187 
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 EXTERN_C_BEGIN
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2194 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2195 {
2196   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2197   PetscInt    i,nz,mbs;
2198 
2199   PetscFunctionBegin;
2200   nz  = baij->maxnz/baij->bs2;
2201   mbs = baij->mbs;
2202   for (i=0; i<nz; i++) {
2203     baij->j[i] = indices[i];
2204   }
2205   baij->nz = nz;
2206   for (i=0; i<mbs; i++) {
2207     baij->ilen[i] = baij->imax[i];
2208   }
2209   PetscFunctionReturn(0);
2210 }
2211 EXTERN_C_END
2212 
2213 #undef __FUNCT__
2214 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2215 /*@
2216     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2217        in the matrix.
2218 
2219   Input Parameters:
2220 +  mat - the SeqBAIJ matrix
2221 -  indices - the column indices
2222 
2223   Level: advanced
2224 
2225   Notes:
2226     This can be called if you have precomputed the nonzero structure of the
2227   matrix and want to provide it to the matrix object to improve the performance
2228   of the MatSetValues() operation.
2229 
2230     You MUST have set the correct numbers of nonzeros per row in the call to
2231   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2232 
2233     MUST be called before any calls to MatSetValues();
2234 
2235 @*/
2236 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2237 {
2238   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2239 
2240   PetscFunctionBegin;
2241   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2242   PetscValidPointer(indices,2);
2243   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2244   if (f) {
2245     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2246   } else {
2247     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2248   }
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2254 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2255 {
2256   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2257   PetscErrorCode ierr;
2258   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2259   PetscReal      atmp;
2260   PetscScalar    *x,zero = 0.0;
2261   MatScalar      *aa;
2262   PetscInt       ncols,brow,krow,kcol;
2263 
2264   PetscFunctionBegin;
2265   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2266   bs   = A->rmap->bs;
2267   aa   = a->a;
2268   ai   = a->i;
2269   aj   = a->j;
2270   mbs  = a->mbs;
2271 
2272   ierr = VecSet(v,zero);CHKERRQ(ierr);
2273   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2274   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2275   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2276   for (i=0; i<mbs; i++) {
2277     ncols = ai[1] - ai[0]; ai++;
2278     brow  = bs*i;
2279     for (j=0; j<ncols; j++){
2280       for (kcol=0; kcol<bs; kcol++){
2281         for (krow=0; krow<bs; krow++){
2282           atmp = PetscAbsScalar(*aa);aa++;
2283           row = brow + krow;    /* row index */
2284           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2285           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2286         }
2287       }
2288       aj++;
2289     }
2290   }
2291   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatCopy_SeqBAIJ"
2297 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2298 {
2299   PetscErrorCode ierr;
2300 
2301   PetscFunctionBegin;
2302   /* If the two matrices have the same copy implementation, use fast copy. */
2303   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2304     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2305     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2306 
2307     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2308       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2309     }
2310     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2311   } else {
2312     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2313   }
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 #undef __FUNCT__
2318 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2319 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2320 {
2321   PetscErrorCode ierr;
2322 
2323   PetscFunctionBegin;
2324   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2325   PetscFunctionReturn(0);
2326 }
2327 
2328 #undef __FUNCT__
2329 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2330 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2331 {
2332   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2333   PetscFunctionBegin;
2334   *array = a->a;
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 #undef __FUNCT__
2339 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2340 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2341 {
2342   PetscFunctionBegin;
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #include "petscblaslapack.h"
2347 #undef __FUNCT__
2348 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2349 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2350 {
2351   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2352   PetscErrorCode ierr;
2353   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2354   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2355 
2356   PetscFunctionBegin;
2357   if (str == SAME_NONZERO_PATTERN) {
2358     PetscScalar alpha = a;
2359     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2360   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2361     if (y->xtoy && y->XtoY != X) {
2362       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2363       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2364     }
2365     if (!y->xtoy) { /* get xtoy */
2366       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2367       y->XtoY = X;
2368       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2369     }
2370     bs2 = bs*bs;
2371     for (i=0; i<x->nz; i++) {
2372       j = 0;
2373       while (j < bs2){
2374         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2375         j++;
2376       }
2377     }
2378     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2379   } else {
2380     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2381   }
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2387 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2388 {
2389   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2390   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2391   MatScalar      *aa = a->a;
2392 
2393   PetscFunctionBegin;
2394   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 #undef __FUNCT__
2399 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2400 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2401 {
2402   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2403   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2404   MatScalar      *aa = a->a;
2405 
2406   PetscFunctionBegin;
2407   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2412 
2413 #undef __FUNCT__
2414 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2415 /*
2416     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2417 */
2418 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2419 {
2420   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2421   PetscErrorCode ierr;
2422   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2423   PetscInt       nz = a->i[m],row,*jj,mr,col;
2424 
2425   PetscFunctionBegin;
2426   *nn = n;
2427   if (!ia) PetscFunctionReturn(0);
2428   if (symmetric) {
2429     SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices");
2430   } else {
2431     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2432     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2433     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2434     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2435     jj = a->j;
2436     for (i=0; i<nz; i++) {
2437       collengths[jj[i]]++;
2438     }
2439     cia[0] = oshift;
2440     for (i=0; i<n; i++) {
2441       cia[i+1] = cia[i] + collengths[i];
2442     }
2443     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2444     jj   = a->j;
2445     for (row=0; row<m; row++) {
2446       mr = a->i[row+1] - a->i[row];
2447       for (i=0; i<mr; i++) {
2448         col = *jj++;
2449         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2450       }
2451     }
2452     ierr = PetscFree(collengths);CHKERRQ(ierr);
2453     *ia = cia; *ja = cja;
2454   }
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 #undef __FUNCT__
2459 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2460 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2461 {
2462   PetscErrorCode ierr;
2463 
2464   PetscFunctionBegin;
2465   if (!ia) PetscFunctionReturn(0);
2466   ierr = PetscFree(*ia);CHKERRQ(ierr);
2467   ierr = PetscFree(*ja);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 
2472 /* -------------------------------------------------------------------*/
2473 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2474        MatGetRow_SeqBAIJ,
2475        MatRestoreRow_SeqBAIJ,
2476        MatMult_SeqBAIJ_N,
2477 /* 4*/ MatMultAdd_SeqBAIJ_N,
2478        MatMultTranspose_SeqBAIJ,
2479        MatMultTransposeAdd_SeqBAIJ,
2480        0,
2481        0,
2482        0,
2483 /*10*/ 0,
2484        MatLUFactor_SeqBAIJ,
2485        0,
2486        0,
2487        MatTranspose_SeqBAIJ,
2488 /*15*/ MatGetInfo_SeqBAIJ,
2489        MatEqual_SeqBAIJ,
2490        MatGetDiagonal_SeqBAIJ,
2491        MatDiagonalScale_SeqBAIJ,
2492        MatNorm_SeqBAIJ,
2493 /*20*/ 0,
2494        MatAssemblyEnd_SeqBAIJ,
2495        MatSetOption_SeqBAIJ,
2496        MatZeroEntries_SeqBAIJ,
2497 /*24*/ MatZeroRows_SeqBAIJ,
2498        0,
2499        0,
2500        0,
2501        0,
2502 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2503        0,
2504        0,
2505        MatGetArray_SeqBAIJ,
2506        MatRestoreArray_SeqBAIJ,
2507 /*34*/ MatDuplicate_SeqBAIJ,
2508        0,
2509        0,
2510        MatILUFactor_SeqBAIJ,
2511        0,
2512 /*39*/ MatAXPY_SeqBAIJ,
2513        MatGetSubMatrices_SeqBAIJ,
2514        MatIncreaseOverlap_SeqBAIJ,
2515        MatGetValues_SeqBAIJ,
2516        MatCopy_SeqBAIJ,
2517 /*44*/ 0,
2518        MatScale_SeqBAIJ,
2519        0,
2520        0,
2521        MatILUDTFactor_SeqBAIJ,
2522 /*49*/ 0,
2523        MatGetRowIJ_SeqBAIJ,
2524        MatRestoreRowIJ_SeqBAIJ,
2525        MatGetColumnIJ_SeqBAIJ,
2526        MatRestoreColumnIJ_SeqBAIJ,
2527 /*54*/ MatFDColoringCreate_SeqAIJ,
2528        0,
2529        0,
2530        0,
2531        MatSetValuesBlocked_SeqBAIJ,
2532 /*59*/ MatGetSubMatrix_SeqBAIJ,
2533        MatDestroy_SeqBAIJ,
2534        MatView_SeqBAIJ,
2535        0,
2536        0,
2537 /*64*/ 0,
2538        0,
2539        0,
2540        0,
2541        0,
2542 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2543        0,
2544        MatConvert_Basic,
2545        0,
2546        0,
2547 /*74*/ 0,
2548        0,
2549        0,
2550        0,
2551        0,
2552 /*79*/ 0,
2553        0,
2554        0,
2555        0,
2556        MatLoad_SeqBAIJ,
2557 /*84*/ 0,
2558        0,
2559        0,
2560        0,
2561        0,
2562 /*89*/ 0,
2563        0,
2564        0,
2565        0,
2566        0,
2567 /*94*/ 0,
2568        0,
2569        0,
2570        0,
2571        0,
2572 /*99*/0,
2573        0,
2574        0,
2575        0,
2576        0,
2577 /*104*/0,
2578        MatRealPart_SeqBAIJ,
2579        MatImaginaryPart_SeqBAIJ,
2580        0,
2581        0,
2582 /*109*/0,
2583        0,
2584        0,
2585        0,
2586        MatMissingDiagonal_SeqBAIJ
2587 /*114*/
2588 };
2589 
2590 EXTERN_C_BEGIN
2591 #undef __FUNCT__
2592 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2593 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2594 {
2595   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2596   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2597   PetscErrorCode ierr;
2598 
2599   PetscFunctionBegin;
2600   if (aij->nonew != 1) {
2601     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2602   }
2603 
2604   /* allocate space for values if not already there */
2605   if (!aij->saved_values) {
2606     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2607   }
2608 
2609   /* copy values over */
2610   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2611   PetscFunctionReturn(0);
2612 }
2613 EXTERN_C_END
2614 
2615 EXTERN_C_BEGIN
2616 #undef __FUNCT__
2617 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2618 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2619 {
2620   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2621   PetscErrorCode ierr;
2622   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2623 
2624   PetscFunctionBegin;
2625   if (aij->nonew != 1) {
2626     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2627   }
2628   if (!aij->saved_values) {
2629     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2630   }
2631 
2632   /* copy values over */
2633   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2634   PetscFunctionReturn(0);
2635 }
2636 EXTERN_C_END
2637 
2638 EXTERN_C_BEGIN
2639 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2640 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2641 EXTERN_C_END
2642 
2643 EXTERN_C_BEGIN
2644 #undef __FUNCT__
2645 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2646 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2647 {
2648   Mat_SeqBAIJ    *b;
2649   PetscErrorCode ierr;
2650   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2651   PetscTruth     flg,skipallocation = PETSC_FALSE;
2652 
2653   PetscFunctionBegin;
2654 
2655   if (nz == MAT_SKIP_ALLOCATION) {
2656     skipallocation = PETSC_TRUE;
2657     nz             = 0;
2658   }
2659 
2660   if (bs < 0) {
2661     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2662       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2663     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2664     bs   = PetscAbs(bs);
2665   }
2666   if (nnz && newbs != bs) {
2667     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2668   }
2669   bs   = newbs;
2670 
2671   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2672   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2673   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2674   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2675 
2676   B->preallocated = PETSC_TRUE;
2677 
2678   mbs  = B->rmap->n/bs;
2679   nbs  = B->cmap->n/bs;
2680   bs2  = bs*bs;
2681 
2682   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2683     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2684   }
2685 
2686   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2687   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2688   if (nnz) {
2689     for (i=0; i<mbs; i++) {
2690       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2691       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2692     }
2693   }
2694 
2695   b       = (Mat_SeqBAIJ*)B->data;
2696   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2697     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2698   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2699 
2700   if (!flg) {
2701     switch (bs) {
2702     case 1:
2703       B->ops->mult            = MatMult_SeqBAIJ_1;
2704       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2705       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2706       break;
2707     case 2:
2708       B->ops->mult            = MatMult_SeqBAIJ_2;
2709       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2710       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2711       break;
2712     case 3:
2713       B->ops->mult            = MatMult_SeqBAIJ_3;
2714       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2715       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2716       break;
2717     case 4:
2718       B->ops->mult            = MatMult_SeqBAIJ_4;
2719       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2720       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2721       break;
2722     case 5:
2723       B->ops->mult            = MatMult_SeqBAIJ_5;
2724       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2725       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2726       break;
2727     case 6:
2728       B->ops->mult            = MatMult_SeqBAIJ_6;
2729       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2730       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2731       break;
2732     case 7:
2733       B->ops->mult            = MatMult_SeqBAIJ_7;
2734       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2735       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2736       break;
2737     default:
2738       B->ops->mult            = MatMult_SeqBAIJ_N;
2739       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2740       break;
2741     }
2742   }
2743   B->rmap->bs      = bs;
2744   b->mbs     = mbs;
2745   b->nbs     = nbs;
2746   if (!skipallocation) {
2747     if (!b->imax) {
2748       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2749     }
2750     /* b->ilen will count nonzeros in each block row so far. */
2751     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2752     if (!nnz) {
2753       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2754       else if (nz <= 0)        nz = 1;
2755       for (i=0; i<mbs; i++) b->imax[i] = nz;
2756       nz = nz*mbs;
2757     } else {
2758       nz = 0;
2759       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2760     }
2761 
2762     /* allocate the matrix space */
2763     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2764     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2765     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2766     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2767     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2768     b->singlemalloc = PETSC_TRUE;
2769     b->i[0] = 0;
2770     for (i=1; i<mbs+1; i++) {
2771       b->i[i] = b->i[i-1] + b->imax[i-1];
2772     }
2773     b->free_a     = PETSC_TRUE;
2774     b->free_ij    = PETSC_TRUE;
2775   } else {
2776     b->free_a     = PETSC_FALSE;
2777     b->free_ij    = PETSC_FALSE;
2778   }
2779 
2780   B->rmap->bs          = bs;
2781   b->bs2              = bs2;
2782   b->mbs              = mbs;
2783   b->nz               = 0;
2784   b->maxnz            = nz*bs2;
2785   B->info.nz_unneeded = (PetscReal)b->maxnz;
2786   PetscFunctionReturn(0);
2787 }
2788 EXTERN_C_END
2789 
2790 EXTERN_C_BEGIN
2791 #undef __FUNCT__
2792 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2793 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2794 {
2795   PetscInt       i,m,nz,nz_max=0,*nnz;
2796   PetscScalar    *values=0;
2797   PetscErrorCode ierr;
2798 
2799   PetscFunctionBegin;
2800 
2801   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2802 
2803   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2804   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2805   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2806   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2807   m = B->rmap->n/bs;
2808 
2809   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
2810   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
2811   for(i=0; i<m; i++) {
2812     nz = ii[i+1]- ii[i];
2813     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
2814     nz_max = PetscMax(nz_max, nz);
2815     nnz[i] = nz;
2816   }
2817   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2818   ierr = PetscFree(nnz);CHKERRQ(ierr);
2819 
2820   values = (PetscScalar*)V;
2821   if (!values) {
2822     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2823     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2824   }
2825   for (i=0; i<m; i++) {
2826     PetscInt          ncols  = ii[i+1] - ii[i];
2827     const PetscInt    *icols = jj + ii[i];
2828     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2829     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2830   }
2831   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2832   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2833   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2834 
2835   PetscFunctionReturn(0);
2836 }
2837 EXTERN_C_END
2838 
2839 
2840 EXTERN_C_BEGIN
2841 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2842 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
2843 EXTERN_C_END
2844 
2845 /*MC
2846    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2847    block sparse compressed row format.
2848 
2849    Options Database Keys:
2850 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2851 
2852   Level: beginner
2853 
2854 .seealso: MatCreateSeqBAIJ()
2855 M*/
2856 
2857 
2858 EXTERN_C_BEGIN
2859 #undef __FUNCT__
2860 #define __FUNCT__ "MatCreate_SeqBAIJ"
2861 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2862 {
2863   PetscErrorCode ierr;
2864   PetscMPIInt    size;
2865   Mat_SeqBAIJ    *b;
2866 
2867   PetscFunctionBegin;
2868   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2869   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2870 
2871   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2872   B->data = (void*)b;
2873   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2874   B->mapping               = 0;
2875   b->row                   = 0;
2876   b->col                   = 0;
2877   b->icol                  = 0;
2878   b->reallocs              = 0;
2879   b->saved_values          = 0;
2880 
2881   b->roworiented           = PETSC_TRUE;
2882   b->nonew                 = 0;
2883   b->diag                  = 0;
2884   b->solve_work            = 0;
2885   b->mult_work             = 0;
2886   B->spptr                 = 0;
2887   B->info.nz_unneeded      = (PetscReal)b->maxnz;
2888   b->keepnonzeropattern    = PETSC_FALSE;
2889   b->xtoy                  = 0;
2890   b->XtoY                  = 0;
2891   b->compressedrow.use     = PETSC_FALSE;
2892   b->compressedrow.nrows   = 0;
2893   b->compressedrow.i       = PETSC_NULL;
2894   b->compressedrow.rindex  = PETSC_NULL;
2895   b->compressedrow.checked = PETSC_FALSE;
2896   B->same_nonzero          = PETSC_FALSE;
2897 
2898   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
2899                                      "MatGetFactorAvailable_seqbaij_petsc",
2900                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
2901   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2902                                      "MatGetFactor_seqbaij_petsc",
2903                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
2904   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2905                                      "MatInvertBlockDiagonal_SeqBAIJ",
2906                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2907   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2908                                      "MatStoreValues_SeqBAIJ",
2909                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2910   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2911                                      "MatRetrieveValues_SeqBAIJ",
2912                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2913   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2914                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2915                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2916   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2917                                      "MatConvert_SeqBAIJ_SeqAIJ",
2918                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2919   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2920                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2921                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2922   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2923                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2924                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2925   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
2926                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
2927                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
2928   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2929   PetscFunctionReturn(0);
2930 }
2931 EXTERN_C_END
2932 
2933 #undef __FUNCT__
2934 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
2935 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues)
2936 {
2937   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2938   PetscErrorCode ierr;
2939   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2940 
2941   PetscFunctionBegin;
2942   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2943 
2944   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2945   for (i=0; i<mbs; i++) {
2946     c->imax[i] = a->imax[i];
2947     c->ilen[i] = a->ilen[i];
2948   }
2949 
2950   /* allocate the matrix space */
2951   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2952   c->singlemalloc = PETSC_TRUE;
2953   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2954   if (mbs > 0) {
2955     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2956     if (cpvalues == MAT_COPY_VALUES) {
2957       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2958     } else {
2959       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2960     }
2961   }
2962   c->roworiented = a->roworiented;
2963   c->nonew       = a->nonew;
2964   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
2965   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
2966   c->bs2         = a->bs2;
2967   c->mbs         = a->mbs;
2968   c->nbs         = a->nbs;
2969 
2970   if (a->diag) {
2971     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2972     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2973     for (i=0; i<mbs; i++) {
2974       c->diag[i] = a->diag[i];
2975     }
2976   } else c->diag        = 0;
2977   c->nz                 = a->nz;
2978   c->maxnz              = a->maxnz;
2979   c->solve_work         = 0;
2980   c->mult_work          = 0;
2981   c->free_a             = PETSC_TRUE;
2982   c->free_ij            = PETSC_TRUE;
2983   C->preallocated       = PETSC_TRUE;
2984   C->assembled          = PETSC_TRUE;
2985 
2986   c->compressedrow.use     = a->compressedrow.use;
2987   c->compressedrow.nrows   = a->compressedrow.nrows;
2988   c->compressedrow.checked = a->compressedrow.checked;
2989   if ( a->compressedrow.checked && a->compressedrow.use){
2990     i = a->compressedrow.nrows;
2991     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2992     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2993     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2994     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2995   } else {
2996     c->compressedrow.use    = PETSC_FALSE;
2997     c->compressedrow.i      = PETSC_NULL;
2998     c->compressedrow.rindex = PETSC_NULL;
2999   }
3000   C->same_nonzero = A->same_nonzero;
3001   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3002   PetscFunctionReturn(0);
3003 }
3004 
3005 #undef __FUNCT__
3006 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3007 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3008 {
3009     PetscErrorCode ierr;
3010 
3011   PetscFunctionBegin;
3012   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3013   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3014   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3015   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues);
3016   PetscFunctionReturn(0);
3017 }
3018 
3019 #undef __FUNCT__
3020 #define __FUNCT__ "MatLoad_SeqBAIJ"
3021 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3022 {
3023   Mat_SeqBAIJ    *a;
3024   Mat            B;
3025   PetscErrorCode ierr;
3026   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3027   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3028   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3029   PetscInt       *masked,nmask,tmp,bs2,ishift;
3030   PetscMPIInt    size;
3031   int            fd;
3032   PetscScalar    *aa;
3033   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3034 
3035   PetscFunctionBegin;
3036   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3037     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3038   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3039   bs2  = bs*bs;
3040 
3041   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3042   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
3043   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3044   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3045   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3046   M = header[1]; N = header[2]; nz = header[3];
3047 
3048   if (header[3] < 0) {
3049     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3050   }
3051 
3052   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3053 
3054   /*
3055      This code adds extra rows to make sure the number of rows is
3056     divisible by the blocksize
3057   */
3058   mbs        = M/bs;
3059   extra_rows = bs - M + bs*(mbs);
3060   if (extra_rows == bs) extra_rows = 0;
3061   else                  mbs++;
3062   if (extra_rows) {
3063     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3064   }
3065 
3066   /* read in row lengths */
3067   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3068   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3069   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3070 
3071   /* read in column indices */
3072   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3073   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3074   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3075 
3076   /* loop over row lengths determining block row lengths */
3077   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3078   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3079   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
3080   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3081   masked   = mask + mbs;
3082   rowcount = 0; nzcount = 0;
3083   for (i=0; i<mbs; i++) {
3084     nmask = 0;
3085     for (j=0; j<bs; j++) {
3086       kmax = rowlengths[rowcount];
3087       for (k=0; k<kmax; k++) {
3088         tmp = jj[nzcount++]/bs;
3089         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3090       }
3091       rowcount++;
3092     }
3093     browlengths[i] += nmask;
3094     /* zero out the mask elements we set */
3095     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3096   }
3097 
3098   /* create our matrix */
3099   ierr = MatCreate(comm,&B);
3100   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3101   ierr = MatSetType(B,type);CHKERRQ(ierr);
3102   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3103   a = (Mat_SeqBAIJ*)B->data;
3104 
3105   /* set matrix "i" values */
3106   a->i[0] = 0;
3107   for (i=1; i<= mbs; i++) {
3108     a->i[i]      = a->i[i-1] + browlengths[i-1];
3109     a->ilen[i-1] = browlengths[i-1];
3110   }
3111   a->nz         = 0;
3112   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3113 
3114   /* read in nonzero values */
3115   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3116   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3117   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3118 
3119   /* set "a" and "j" values into matrix */
3120   nzcount = 0; jcount = 0;
3121   for (i=0; i<mbs; i++) {
3122     nzcountb = nzcount;
3123     nmask    = 0;
3124     for (j=0; j<bs; j++) {
3125       kmax = rowlengths[i*bs+j];
3126       for (k=0; k<kmax; k++) {
3127         tmp = jj[nzcount++]/bs;
3128 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3129       }
3130     }
3131     /* sort the masked values */
3132     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3133 
3134     /* set "j" values into matrix */
3135     maskcount = 1;
3136     for (j=0; j<nmask; j++) {
3137       a->j[jcount++]  = masked[j];
3138       mask[masked[j]] = maskcount++;
3139     }
3140     /* set "a" values into matrix */
3141     ishift = bs2*a->i[i];
3142     for (j=0; j<bs; j++) {
3143       kmax = rowlengths[i*bs+j];
3144       for (k=0; k<kmax; k++) {
3145         tmp       = jj[nzcountb]/bs ;
3146         block     = mask[tmp] - 1;
3147         point     = jj[nzcountb] - bs*tmp;
3148         idx       = ishift + bs2*block + j + bs*point;
3149         a->a[idx] = (MatScalar)aa[nzcountb++];
3150       }
3151     }
3152     /* zero out the mask elements we set */
3153     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3154   }
3155   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3156 
3157   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3158   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3159   ierr = PetscFree(aa);CHKERRQ(ierr);
3160   ierr = PetscFree(jj);CHKERRQ(ierr);
3161   ierr = PetscFree(mask);CHKERRQ(ierr);
3162 
3163   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3164   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3165   ierr = MatView_Private(B);CHKERRQ(ierr);
3166 
3167   *A = B;
3168   PetscFunctionReturn(0);
3169 }
3170 
3171 #undef __FUNCT__
3172 #define __FUNCT__ "MatCreateSeqBAIJ"
3173 /*@C
3174    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3175    compressed row) format.  For good matrix assembly performance the
3176    user should preallocate the matrix storage by setting the parameter nz
3177    (or the array nnz).  By setting these parameters accurately, performance
3178    during matrix assembly can be increased by more than a factor of 50.
3179 
3180    Collective on MPI_Comm
3181 
3182    Input Parameters:
3183 +  comm - MPI communicator, set to PETSC_COMM_SELF
3184 .  bs - size of block
3185 .  m - number of rows
3186 .  n - number of columns
3187 .  nz - number of nonzero blocks  per block row (same for all rows)
3188 -  nnz - array containing the number of nonzero blocks in the various block rows
3189          (possibly different for each block row) or PETSC_NULL
3190 
3191    Output Parameter:
3192 .  A - the matrix
3193 
3194    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3195    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3196    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3197 
3198    Options Database Keys:
3199 .   -mat_no_unroll - uses code that does not unroll the loops in the
3200                      block calculations (much slower)
3201 .    -mat_block_size - size of the blocks to use
3202 
3203    Level: intermediate
3204 
3205    Notes:
3206    The number of rows and columns must be divisible by blocksize.
3207 
3208    If the nnz parameter is given then the nz parameter is ignored
3209 
3210    A nonzero block is any block that as 1 or more nonzeros in it
3211 
3212    The block AIJ format is fully compatible with standard Fortran 77
3213    storage.  That is, the stored row and column indices can begin at
3214    either one (as in Fortran) or zero.  See the users' manual for details.
3215 
3216    Specify the preallocated storage with either nz or nnz (not both).
3217    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3218    allocation.  For additional details, see the users manual chapter on
3219    matrices.
3220 
3221 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3222 @*/
3223 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3224 {
3225   PetscErrorCode ierr;
3226 
3227   PetscFunctionBegin;
3228   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3229   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3230   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3231   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3232   PetscFunctionReturn(0);
3233 }
3234 
3235 #undef __FUNCT__
3236 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3237 /*@C
3238    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3239    per row in the matrix. For good matrix assembly performance the
3240    user should preallocate the matrix storage by setting the parameter nz
3241    (or the array nnz).  By setting these parameters accurately, performance
3242    during matrix assembly can be increased by more than a factor of 50.
3243 
3244    Collective on MPI_Comm
3245 
3246    Input Parameters:
3247 +  A - the matrix
3248 .  bs - size of block
3249 .  nz - number of block nonzeros per block row (same for all rows)
3250 -  nnz - array containing the number of block nonzeros in the various block rows
3251          (possibly different for each block row) or PETSC_NULL
3252 
3253    Options Database Keys:
3254 .   -mat_no_unroll - uses code that does not unroll the loops in the
3255                      block calculations (much slower)
3256 .    -mat_block_size - size of the blocks to use
3257 
3258    Level: intermediate
3259 
3260    Notes:
3261    If the nnz parameter is given then the nz parameter is ignored
3262 
3263    You can call MatGetInfo() to get information on how effective the preallocation was;
3264    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3265    You can also run with the option -info and look for messages with the string
3266    malloc in them to see if additional memory allocation was needed.
3267 
3268    The block AIJ format is fully compatible with standard Fortran 77
3269    storage.  That is, the stored row and column indices can begin at
3270    either one (as in Fortran) or zero.  See the users' manual for details.
3271 
3272    Specify the preallocated storage with either nz or nnz (not both).
3273    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3274    allocation.  For additional details, see the users manual chapter on
3275    matrices.
3276 
3277 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3278 @*/
3279 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3280 {
3281   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3282 
3283   PetscFunctionBegin;
3284   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3285   if (f) {
3286     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3287   }
3288   PetscFunctionReturn(0);
3289 }
3290 
3291 #undef __FUNCT__
3292 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3293 /*@C
3294    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3295    (the default sequential PETSc format).
3296 
3297    Collective on MPI_Comm
3298 
3299    Input Parameters:
3300 +  A - the matrix
3301 .  i - the indices into j for the start of each local row (starts with zero)
3302 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3303 -  v - optional values in the matrix
3304 
3305    Level: developer
3306 
3307 .keywords: matrix, aij, compressed row, sparse
3308 
3309 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3310 @*/
3311 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3312 {
3313   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3314 
3315   PetscFunctionBegin;
3316   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3317   if (f) {
3318     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3319   }
3320   PetscFunctionReturn(0);
3321 }
3322 
3323 
3324 #undef __FUNCT__
3325 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3326 /*@
3327      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3328               (upper triangular entries in CSR format) provided by the user.
3329 
3330      Collective on MPI_Comm
3331 
3332    Input Parameters:
3333 +  comm - must be an MPI communicator of size 1
3334 .  bs - size of block
3335 .  m - number of rows
3336 .  n - number of columns
3337 .  i - row indices
3338 .  j - column indices
3339 -  a - matrix values
3340 
3341    Output Parameter:
3342 .  mat - the matrix
3343 
3344    Level: intermediate
3345 
3346    Notes:
3347        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3348     once the matrix is destroyed
3349 
3350        You cannot set new nonzero locations into this matrix, that will generate an error.
3351 
3352        The i and j indices are 0 based
3353 
3354 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3355 
3356 @*/
3357 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3358 {
3359   PetscErrorCode ierr;
3360   PetscInt       ii;
3361   Mat_SeqBAIJ    *baij;
3362 
3363   PetscFunctionBegin;
3364   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3365   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3366 
3367   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3368   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3369   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3370   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3371   baij = (Mat_SeqBAIJ*)(*mat)->data;
3372   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3373 
3374   baij->i = i;
3375   baij->j = j;
3376   baij->a = a;
3377   baij->singlemalloc = PETSC_FALSE;
3378   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3379   baij->free_a       = PETSC_FALSE;
3380   baij->free_ij       = PETSC_FALSE;
3381 
3382   for (ii=0; ii<m; ii++) {
3383     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3384 #if defined(PETSC_USE_DEBUG)
3385     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3386 #endif
3387   }
3388 #if defined(PETSC_USE_DEBUG)
3389   for (ii=0; ii<baij->i[m]; ii++) {
3390     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3391     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3392   }
3393 #endif
3394 
3395   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3396   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3397   PetscFunctionReturn(0);
3398 }
3399