xref: /petsc/src/mat/impls/baij/seq/baij.c (revision ee1cef2c4f92b4b45b8aa3ceac59cb944062fab9)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"
8 #include "../src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "../src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
51   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
52   PetscReal      shift = 0.0;
53 
54   PetscFunctionBegin;
55   if (a->idiagvalid) PetscFunctionReturn(0);
56   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
57   diag_offset = a->diag;
58   if (!a->idiag) {
59     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
60   }
61   diag  = a->idiag;
62   mdiag = a->idiag+bs*bs*mbs;
63   /* factor and invert each block */
64   switch (bs){
65     case 2:
66       for (i=0; i<mbs; i++) {
67         odiag   = v + 4*diag_offset[i];
68         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
70 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
71 	diag    += 4;
72 	mdiag   += 4;
73       }
74       break;
75     case 3:
76       for (i=0; i<mbs; i++) {
77         odiag    = v + 9*diag_offset[i];
78         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
80         diag[8]  = odiag[8];
81         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
82         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
83         mdiag[8] = odiag[8];
84 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
85 	diag    += 9;
86 	mdiag   += 9;
87       }
88       break;
89     case 4:
90       for (i=0; i<mbs; i++) {
91         odiag  = v + 16*diag_offset[i];
92         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
94 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
95 	diag  += 16;
96 	mdiag += 16;
97       }
98       break;
99     case 5:
100       for (i=0; i<mbs; i++) {
101         odiag = v + 25*diag_offset[i];
102         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
105 	diag  += 25;
106 	mdiag += 25;
107       }
108       break;
109     case 6:
110       for (i=0; i<mbs; i++) {
111         odiag = v + 36*diag_offset[i];
112         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
113         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
114 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
115 	diag  += 36;
116 	mdiag += 36;
117       }
118       break;
119     default:
120       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
121   }
122   a->idiagvalid = PETSC_TRUE;
123   PetscFunctionReturn(0);
124 }
125 EXTERN_C_END
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
129 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,idx;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         while (nz--) {
166           idx  = (*vi++);
167           x1   = x[idx];
168           s1  -= v[0]*x1;
169           v   += 1;
170         }
171         x[i2]   = idiag[0]*s1;
172         idiag   += 1;
173         i2      += 1;
174       }
175       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
176       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
177     }
178     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
179         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
180       i2    = 0;
181       mdiag = a->idiag+a->mbs;
182       for (i=0; i<m; i++) {
183         x1      = x[i2];
184         x[i2]   = mdiag[0]*x1;
185         mdiag  += 1;
186         i2     += 1;
187       }
188       ierr = PetscLogFlops(m);CHKERRQ(ierr);
189     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
190       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
191     }
192     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
193       idiag   = a->idiag+a->mbs - 1;
194       i2      = m - 1;
195       x1      = x[i2];
196       x[i2]   = idiag[0]*x1;
197       idiag -= 1;
198       i2    -= 1;
199       for (i=m-2; i>=0; i--) {
200         v     = aa + (diag[i]+1);
201         vi    = aj + diag[i] + 1;
202         nz    = ai[i+1] - diag[i] - 1;
203         s1    = x[i2];
204         while (nz--) {
205           idx  = (*vi++);
206           x1   = x[idx];
207           s1  -= v[0]*x1;
208           v   += 1;
209         }
210         x[i2]   = idiag[0]*s1;
211         idiag   -= 2;
212         i2      -= 1;
213       }
214       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
215     }
216   } else {
217     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
218   }
219   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
220   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
221   PetscFunctionReturn(0);
222 }
223 
224 #undef __FUNCT__
225 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
226 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
227 {
228   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
229   PetscScalar        *x,x1,x2,s1,s2;
230   const PetscScalar  *b;
231   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
232   PetscErrorCode     ierr;
233   PetscInt           m = a->mbs,i,i2,nz,idx;
234   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
235 
236   PetscFunctionBegin;
237   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
238   its = its*lits;
239   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
240   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
241   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
242   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
243   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
244 
245   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
246 
247   diag  = a->diag;
248   idiag = a->idiag;
249   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
250   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
251 
252   if (flag & SOR_ZERO_INITIAL_GUESS) {
253     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
254       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
255       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
256       i2     = 2;
257       idiag += 4;
258       for (i=1; i<m; i++) {
259 	v     = aa + 4*ai[i];
260 	vi    = aj + ai[i];
261 	nz    = diag[i] - ai[i];
262 	s1    = b[i2]; s2 = b[i2+1];
263 	while (nz--) {
264 	  idx  = 2*(*vi++);
265 	  x1   = x[idx]; x2 = x[1+idx];
266 	  s1  -= v[0]*x1 + v[2]*x2;
267 	  s2  -= v[1]*x1 + v[3]*x2;
268 	  v   += 4;
269 	}
270 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
271 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
272         idiag   += 4;
273         i2      += 2;
274       }
275       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
276       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
277     }
278     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
279         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
280       i2    = 0;
281       mdiag = a->idiag+4*a->mbs;
282       for (i=0; i<m; i++) {
283         x1      = x[i2]; x2 = x[i2+1];
284         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
285         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
286         mdiag  += 4;
287         i2     += 2;
288       }
289       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
290     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
291       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
292     }
293     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
294       idiag   = a->idiag+4*a->mbs - 4;
295       i2      = 2*m - 2;
296       x1      = x[i2]; x2 = x[i2+1];
297       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
298       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
299       idiag -= 4;
300       i2    -= 2;
301       for (i=m-2; i>=0; i--) {
302 	v     = aa + 4*(diag[i]+1);
303 	vi    = aj + diag[i] + 1;
304 	nz    = ai[i+1] - diag[i] - 1;
305 	s1    = x[i2]; s2 = x[i2+1];
306 	while (nz--) {
307 	  idx  = 2*(*vi++);
308 	  x1   = x[idx]; x2 = x[1+idx];
309 	  s1  -= v[0]*x1 + v[2]*x2;
310 	  s2  -= v[1]*x1 + v[3]*x2;
311 	  v   += 4;
312 	}
313 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
314 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
315         idiag   -= 4;
316         i2      -= 2;
317       }
318       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
319     }
320   } else {
321     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322   }
323   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
330 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331 {
332   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
333   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
334   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
335   const PetscScalar  *b;
336   PetscErrorCode     ierr;
337   PetscInt           m = a->mbs,i,i2,nz,idx;
338   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
339 
340   PetscFunctionBegin;
341   its = its*lits;
342   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
343   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
344   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
345   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
346   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
347   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
348 
349   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
350 
351   diag  = a->diag;
352   idiag = a->idiag;
353   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
354   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
355 
356   if (flag & SOR_ZERO_INITIAL_GUESS) {
357     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
358       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
359       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
360       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
361       i2     = 3;
362       idiag += 9;
363       for (i=1; i<m; i++) {
364         v     = aa + 9*ai[i];
365         vi    = aj + ai[i];
366         nz    = diag[i] - ai[i];
367         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
368         while (nz--) {
369           idx  = 3*(*vi++);
370           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
371           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
372           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
373           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
374           v   += 9;
375         }
376         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
377         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
378         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
379         idiag   += 9;
380         i2      += 3;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+9*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
391         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
392         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
393         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
394         mdiag  += 9;
395         i2     += 3;
396       }
397       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
398     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
399       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
400     }
401     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
402       idiag   = a->idiag+9*a->mbs - 9;
403       i2      = 3*m - 3;
404       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
405       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
406       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
407       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
408       idiag -= 9;
409       i2    -= 3;
410       for (i=m-2; i>=0; i--) {
411         v     = aa + 9*(diag[i]+1);
412         vi    = aj + diag[i] + 1;
413         nz    = ai[i+1] - diag[i] - 1;
414         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
415         while (nz--) {
416           idx  = 3*(*vi++);
417           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
418           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
419           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
420           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
421           v   += 9;
422         }
423         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
424         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
425         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
426         idiag   -= 9;
427         i2      -= 3;
428       }
429       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
430     }
431   } else {
432     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
433   }
434   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
435   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
436   PetscFunctionReturn(0);
437 }
438 
439 #undef __FUNCT__
440 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
441 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
442 {
443   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
444   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
445   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
446   const PetscScalar  *b;
447   PetscErrorCode     ierr;
448   PetscInt           m = a->mbs,i,i2,nz,idx;
449   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
450 
451   PetscFunctionBegin;
452   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
453   its = its*lits;
454   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
455   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
456   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
457   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
458   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
459 
460   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
461 
462   diag  = a->diag;
463   idiag = a->idiag;
464   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
465   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
466 
467   if (flag & SOR_ZERO_INITIAL_GUESS) {
468     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
469       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
470       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
471       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
472       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
473       i2     = 4;
474       idiag += 16;
475       for (i=1; i<m; i++) {
476 	v     = aa + 16*ai[i];
477 	vi    = aj + ai[i];
478 	nz    = diag[i] - ai[i];
479 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
480 	while (nz--) {
481 	  idx  = 4*(*vi++);
482 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
483 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
484 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
485 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
486 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
487 	  v   += 16;
488 	}
489 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
490 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
491 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
492 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
493         idiag   += 16;
494         i2      += 4;
495       }
496       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
497       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
498     }
499     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
500         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
501       i2    = 0;
502       mdiag = a->idiag+16*a->mbs;
503       for (i=0; i<m; i++) {
504         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
505         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
506         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
507         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
508         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
509         mdiag  += 16;
510         i2     += 4;
511       }
512       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
513     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
514       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
515     }
516     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
517       idiag   = a->idiag+16*a->mbs - 16;
518       i2      = 4*m - 4;
519       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
520       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
521       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
522       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
523       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
524       idiag -= 16;
525       i2    -= 4;
526       for (i=m-2; i>=0; i--) {
527 	v     = aa + 16*(diag[i]+1);
528 	vi    = aj + diag[i] + 1;
529 	nz    = ai[i+1] - diag[i] - 1;
530 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
531 	while (nz--) {
532 	  idx  = 4*(*vi++);
533 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
534 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
535 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
536 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
537 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
538 	  v   += 16;
539 	}
540 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
541 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
542 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
543 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
544         idiag   -= 16;
545         i2      -= 4;
546       }
547       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
548     }
549   } else {
550     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
551   }
552   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
553   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
554   PetscFunctionReturn(0);
555 }
556 
557 #undef __FUNCT__
558 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
559 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
560 {
561   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
562   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
563   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
564   const PetscScalar  *b;
565   PetscErrorCode     ierr;
566   PetscInt           m = a->mbs,i,i2,nz,idx;
567   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
568 
569   PetscFunctionBegin;
570   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
571   its = its*lits;
572   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
573   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
574   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
575   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
576   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
577 
578   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
579 
580   diag  = a->diag;
581   idiag = a->idiag;
582   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
583   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
584 
585   if (flag & SOR_ZERO_INITIAL_GUESS) {
586     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
587       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
588       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
589       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
590       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
591       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
592       i2     = 5;
593       idiag += 25;
594       for (i=1; i<m; i++) {
595 	v     = aa + 25*ai[i];
596 	vi    = aj + ai[i];
597 	nz    = diag[i] - ai[i];
598 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
599 	while (nz--) {
600 	  idx  = 5*(*vi++);
601 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
602 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
603 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
604 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
605 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
606 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
607 	  v   += 25;
608 	}
609 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
610 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
611 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
612 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
613 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
614         idiag   += 25;
615         i2      += 5;
616       }
617       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
618       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
619     }
620     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
621         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
622       i2    = 0;
623       mdiag = a->idiag+25*a->mbs;
624       for (i=0; i<m; i++) {
625         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
626         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
627         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
628         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
629         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
630         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
631         mdiag  += 25;
632         i2     += 5;
633       }
634       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
635     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
636       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
637     }
638     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
639       idiag   = a->idiag+25*a->mbs - 25;
640       i2      = 5*m - 5;
641       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
642       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
643       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
644       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
645       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
646       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
647       idiag -= 25;
648       i2    -= 5;
649       for (i=m-2; i>=0; i--) {
650 	v     = aa + 25*(diag[i]+1);
651 	vi    = aj + diag[i] + 1;
652 	nz    = ai[i+1] - diag[i] - 1;
653 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
654 	while (nz--) {
655 	  idx  = 5*(*vi++);
656 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
657 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
658 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
659 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
660 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
661 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
662 	  v   += 25;
663 	}
664 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
665 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
666 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
667 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
668 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
669         idiag   -= 25;
670         i2      -= 5;
671       }
672       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
673     }
674   } else {
675     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
676   }
677   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
678   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
679   PetscFunctionReturn(0);
680 }
681 
682 #undef __FUNCT__
683 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
684 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
685 {
686   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
687   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
688   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
689   const PetscScalar  *b;
690   PetscErrorCode     ierr;
691   PetscInt           m = a->mbs,i,i2,nz,idx;
692   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
693 
694   PetscFunctionBegin;
695   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
696   its = its*lits;
697   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
698   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
699   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
700   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
701   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
702 
703   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
704 
705   diag  = a->diag;
706   idiag = a->idiag;
707   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
708   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
709 
710   if (flag & SOR_ZERO_INITIAL_GUESS) {
711     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
712       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
713       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
714       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
715       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
716       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
717       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
718       i2     = 6;
719       idiag += 36;
720       for (i=1; i<m; i++) {
721         v     = aa + 36*ai[i];
722         vi    = aj + ai[i];
723         nz    = diag[i] - ai[i];
724         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
725         while (nz--) {
726           idx  = 6*(*vi++);
727           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
728           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
729           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
730           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
731           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
732           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
733           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
734           v   += 36;
735         }
736         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
737         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
738         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
739         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
740         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
741         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
742         idiag   += 36;
743         i2      += 6;
744       }
745       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
746       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
747     }
748     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
749         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
750       i2    = 0;
751       mdiag = a->idiag+36*a->mbs;
752       for (i=0; i<m; i++) {
753         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
754         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
755         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
756         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
757         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
758         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
759         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
760         mdiag  += 36;
761         i2     += 6;
762       }
763       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
764     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
765       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
766     }
767     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
768       idiag   = a->idiag+36*a->mbs - 36;
769       i2      = 6*m - 6;
770       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
771       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
772       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
773       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
774       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
775       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
776       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
777       idiag -= 36;
778       i2    -= 6;
779       for (i=m-2; i>=0; i--) {
780         v     = aa + 36*(diag[i]+1);
781         vi    = aj + diag[i] + 1;
782         nz    = ai[i+1] - diag[i] - 1;
783         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
784         while (nz--) {
785           idx  = 6*(*vi++);
786           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
787           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
788           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
789           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
790           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
791           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
792           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
793           v   += 36;
794         }
795         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
796         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
797         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
798         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
799         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
800         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
801         idiag   -= 36;
802         i2      -= 6;
803       }
804       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
805     }
806   } else {
807     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
808   }
809   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
810   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
811   PetscFunctionReturn(0);
812 }
813 
814 #undef __FUNCT__
815 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
816 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
817 {
818   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
819   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
820   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
821   const PetscScalar  *b;
822   PetscErrorCode     ierr;
823   PetscInt           m = a->mbs,i,i2,nz,idx;
824   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
825 
826   PetscFunctionBegin;
827   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
828   its = its*lits;
829   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
830   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
831   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
832   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
833   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
834 
835   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
836 
837   diag  = a->diag;
838   idiag = a->idiag;
839   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
840   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
841 
842   if (flag & SOR_ZERO_INITIAL_GUESS) {
843     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
844       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
845       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
846       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
847       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
848       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
849       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
850       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
851       i2     = 7;
852       idiag += 49;
853       for (i=1; i<m; i++) {
854         v     = aa + 49*ai[i];
855         vi    = aj + ai[i];
856         nz    = diag[i] - ai[i];
857         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
858         while (nz--) {
859           idx  = 7*(*vi++);
860           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
861           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
862           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
863           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
864           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
865           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
866           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
867           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
868           v   += 49;
869         }
870         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
871         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
872         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
873         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
874         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
875         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
876         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
877         idiag   += 49;
878         i2      += 7;
879       }
880       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
881       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
882     }
883     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
884         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
885       i2    = 0;
886       mdiag = a->idiag+49*a->mbs;
887       for (i=0; i<m; i++) {
888         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
889         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
890         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
891         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
892         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
893         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
894         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
895         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
896         mdiag  += 36;
897         i2     += 6;
898       }
899       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
900     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
901       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
902     }
903     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
904       idiag   = a->idiag+49*a->mbs - 49;
905       i2      = 7*m - 7;
906       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
907       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
908       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
909       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
910       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
911       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
912       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
913       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
914       idiag -= 49;
915       i2    -= 7;
916       for (i=m-2; i>=0; i--) {
917         v     = aa + 49*(diag[i]+1);
918         vi    = aj + diag[i] + 1;
919         nz    = ai[i+1] - diag[i] - 1;
920         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
921         while (nz--) {
922           idx  = 7*(*vi++);
923           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
924           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
925           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
926           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
927           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
928           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
929           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
930           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
931           v   += 49;
932         }
933         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
934         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
935         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
936         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
937         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
938         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
939         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
940         idiag   -= 49;
941         i2      -= 7;
942       }
943       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
944     }
945   } else {
946     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
947   }
948   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
949   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
950   PetscFunctionReturn(0);
951 }
952 
953 /*
954     Special version for direct calls from Fortran (Used in PETSc-fun3d)
955 */
956 #if defined(PETSC_HAVE_FORTRAN_CAPS)
957 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
958 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
959 #define matsetvaluesblocked4_ matsetvaluesblocked4
960 #endif
961 
962 EXTERN_C_BEGIN
963 #undef __FUNCT__
964 #define __FUNCT__ "matsetvaluesblocked4_"
965 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
966 {
967   Mat               A = *AA;
968   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
969   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
970   PetscInt          *ai=a->i,*ailen=a->ilen;
971   PetscInt          *aj=a->j,stepval,lastcol = -1;
972   const PetscScalar *value = v;
973   MatScalar         *ap,*aa = a->a,*bap;
974 
975   PetscFunctionBegin;
976   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
977   stepval = (n-1)*4;
978   for (k=0; k<m; k++) { /* loop over added rows */
979     row  = im[k];
980     rp   = aj + ai[row];
981     ap   = aa + 16*ai[row];
982     nrow = ailen[row];
983     low  = 0;
984     high = nrow;
985     for (l=0; l<n; l++) { /* loop over added columns */
986       col = in[l];
987       if (col <= lastcol) low = 0; else high = nrow;
988       lastcol = col;
989       value = v + k*(stepval+4 + l)*4;
990       while (high-low > 7) {
991         t = (low+high)/2;
992         if (rp[t] > col) high = t;
993         else             low  = t;
994       }
995       for (i=low; i<high; i++) {
996         if (rp[i] > col) break;
997         if (rp[i] == col) {
998           bap  = ap +  16*i;
999           for (ii=0; ii<4; ii++,value+=stepval) {
1000             for (jj=ii; jj<16; jj+=4) {
1001               bap[jj] += *value++;
1002             }
1003           }
1004           goto noinsert2;
1005         }
1006       }
1007       N = nrow++ - 1;
1008       high++; /* added new column index thus must search to one higher than before */
1009       /* shift up all the later entries in this row */
1010       for (ii=N; ii>=i; ii--) {
1011         rp[ii+1] = rp[ii];
1012         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1013       }
1014       if (N >= i) {
1015         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1016       }
1017       rp[i] = col;
1018       bap   = ap +  16*i;
1019       for (ii=0; ii<4; ii++,value+=stepval) {
1020         for (jj=ii; jj<16; jj+=4) {
1021           bap[jj] = *value++;
1022         }
1023       }
1024       noinsert2:;
1025       low = i;
1026     }
1027     ailen[row] = nrow;
1028   }
1029   PetscFunctionReturnVoid();
1030 }
1031 EXTERN_C_END
1032 
1033 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1034 #define matsetvalues4_ MATSETVALUES4
1035 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1036 #define matsetvalues4_ matsetvalues4
1037 #endif
1038 
1039 EXTERN_C_BEGIN
1040 #undef __FUNCT__
1041 #define __FUNCT__ "MatSetValues4_"
1042 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1043 {
1044   Mat         A = *AA;
1045   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1046   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1047   PetscInt    *ai=a->i,*ailen=a->ilen;
1048   PetscInt    *aj=a->j,brow,bcol;
1049   PetscInt    ridx,cidx,lastcol = -1;
1050   MatScalar   *ap,value,*aa=a->a,*bap;
1051 
1052   PetscFunctionBegin;
1053   for (k=0; k<m; k++) { /* loop over added rows */
1054     row  = im[k]; brow = row/4;
1055     rp   = aj + ai[brow];
1056     ap   = aa + 16*ai[brow];
1057     nrow = ailen[brow];
1058     low  = 0;
1059     high = nrow;
1060     for (l=0; l<n; l++) { /* loop over added columns */
1061       col = in[l]; bcol = col/4;
1062       ridx = row % 4; cidx = col % 4;
1063       value = v[l + k*n];
1064       if (col <= lastcol) low = 0; else high = nrow;
1065       lastcol = col;
1066       while (high-low > 7) {
1067         t = (low+high)/2;
1068         if (rp[t] > bcol) high = t;
1069         else              low  = t;
1070       }
1071       for (i=low; i<high; i++) {
1072         if (rp[i] > bcol) break;
1073         if (rp[i] == bcol) {
1074           bap  = ap +  16*i + 4*cidx + ridx;
1075           *bap += value;
1076           goto noinsert1;
1077         }
1078       }
1079       N = nrow++ - 1;
1080       high++; /* added new column thus must search to one higher than before */
1081       /* shift up all the later entries in this row */
1082       for (ii=N; ii>=i; ii--) {
1083         rp[ii+1] = rp[ii];
1084         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1085       }
1086       if (N>=i) {
1087         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1088       }
1089       rp[i]                    = bcol;
1090       ap[16*i + 4*cidx + ridx] = value;
1091       noinsert1:;
1092       low = i;
1093     }
1094     ailen[brow] = nrow;
1095   }
1096   PetscFunctionReturnVoid();
1097 }
1098 EXTERN_C_END
1099 
1100 /*
1101      Checks for missing diagonals
1102 */
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1105 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1106 {
1107   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1108   PetscErrorCode ierr;
1109   PetscInt       *diag,*jj = a->j,i;
1110 
1111   PetscFunctionBegin;
1112   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1113   *missing = PETSC_FALSE;
1114   if (A->rmap->n > 0 && !jj) {
1115     *missing  = PETSC_TRUE;
1116     if (d) *d = 0;
1117     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1118   } else {
1119     diag     = a->diag;
1120     for (i=0; i<a->mbs; i++) {
1121       if (jj[diag[i]] != i) {
1122         *missing  = PETSC_TRUE;
1123         if (d) *d = i;
1124         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1125       }
1126     }
1127   }
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1133 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1134 {
1135   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1136   PetscErrorCode ierr;
1137   PetscInt       i,j,m = a->mbs;
1138 
1139   PetscFunctionBegin;
1140   if (!a->diag) {
1141     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1142   }
1143   for (i=0; i<m; i++) {
1144     a->diag[i] = a->i[i+1];
1145     for (j=a->i[i]; j<a->i[i+1]; j++) {
1146       if (a->j[j] == i) {
1147         a->diag[i] = j;
1148         break;
1149       }
1150     }
1151   }
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 
1156 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1157 
1158 #undef __FUNCT__
1159 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1160 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1161 {
1162   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1163   PetscErrorCode ierr;
1164   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1165   PetscInt       *tia, *tja;
1166 
1167   PetscFunctionBegin;
1168   *nn = n;
1169   if (!ia) PetscFunctionReturn(0);
1170   if (symmetric) {
1171     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1172   } else {
1173     tia = a->i; tja = a->j;
1174   }
1175 
1176   if (!blockcompressed && bs > 1) {
1177     (*nn) *= bs;
1178     nbs    = bs;
1179     /* malloc & create the natural set of indices */
1180     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1181     if (n) {
1182       (*ia)[0] = 0;
1183       for (j=1; j<bs; j++) {
1184         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1185       }
1186     }
1187 
1188     for (i=1; i<n; i++) {
1189       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1190       for (j=1; j<bs; j++) {
1191         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1192       }
1193     }
1194     if (n) {
1195       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1196     }
1197 
1198     if (ja) {
1199       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1200       cnt = 0;
1201       for (i=0; i<n; i++) {
1202         for (j=0; j<bs; j++) {
1203           for (k=tia[i]; k<tia[i+1]; k++) {
1204             for (l=0; l<bs; l++) {
1205               (*ja)[cnt++] = bs*tja[k] + l;
1206 	    }
1207           }
1208         }
1209       }
1210     }
1211 
1212     n     *= bs;
1213     nz *= bs*bs;
1214     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1215       ierr = PetscFree(tia);CHKERRQ(ierr);
1216       ierr = PetscFree(tja);CHKERRQ(ierr);
1217     }
1218   } else {
1219     *ia = tia;
1220     if (ja) *ja = tja;
1221   }
1222   if (oshift == 1) {
1223     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1224     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1225   }
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1231 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1232 {
1233   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1234   PetscErrorCode ierr;
1235   PetscInt       i,n = a->mbs,nz = a->i[n];
1236 
1237   PetscFunctionBegin;
1238   if (!ia) PetscFunctionReturn(0);
1239   if (!blockcompressed && A->rmap->bs > 1) {
1240     ierr = PetscFree(*ia);CHKERRQ(ierr);
1241     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1242   } else if (symmetric) {
1243     ierr = PetscFree(*ia);CHKERRQ(ierr);
1244     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1245   } else if (oshift == 1) { /* blockcompressed */
1246     for (i=0; i<n+1; i++) a->i[i]--;
1247     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1248   }
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1254 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1255 {
1256   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260 #if defined(PETSC_USE_LOG)
1261   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1262 #endif
1263   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1264   if (a->row) {
1265     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1266   }
1267   if (a->col) {
1268     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1269   }
1270   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1271   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1272   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1273   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1274   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1275   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1276   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1277   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1278   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1279 
1280   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1281   ierr = PetscFree(a);CHKERRQ(ierr);
1282 
1283   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNCT__
1296 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1297 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1298 {
1299   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1300   PetscErrorCode ierr;
1301 
1302   PetscFunctionBegin;
1303   switch (op) {
1304   case MAT_ROW_ORIENTED:
1305     a->roworiented    = flg;
1306     break;
1307   case MAT_KEEP_NONZERO_PATTERN:
1308     a->keepnonzeropattern = flg;
1309     break;
1310   case MAT_NEW_NONZERO_LOCATIONS:
1311     a->nonew          = (flg ? 0 : 1);
1312     break;
1313   case MAT_NEW_NONZERO_LOCATION_ERR:
1314     a->nonew          = (flg ? -1 : 0);
1315     break;
1316   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1317     a->nonew          = (flg ? -2 : 0);
1318     break;
1319   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1320     a->nounused       = (flg ? -1 : 0);
1321     break;
1322   case MAT_NEW_DIAGONALS:
1323   case MAT_IGNORE_OFF_PROC_ENTRIES:
1324   case MAT_USE_HASH_TABLE:
1325     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1326     break;
1327   case MAT_SYMMETRIC:
1328   case MAT_STRUCTURALLY_SYMMETRIC:
1329   case MAT_HERMITIAN:
1330   case MAT_SYMMETRY_ETERNAL:
1331     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1332     break;
1333   default:
1334     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1335   }
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1341 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1344   PetscErrorCode ierr;
1345   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1346   MatScalar      *aa,*aa_i;
1347   PetscScalar    *v_i;
1348 
1349   PetscFunctionBegin;
1350   bs  = A->rmap->bs;
1351   ai  = a->i;
1352   aj  = a->j;
1353   aa  = a->a;
1354   bs2 = a->bs2;
1355 
1356   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1357 
1358   bn  = row/bs;   /* Block number */
1359   bp  = row % bs; /* Block Position */
1360   M   = ai[bn+1] - ai[bn];
1361   *nz = bs*M;
1362 
1363   if (v) {
1364     *v = 0;
1365     if (*nz) {
1366       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1367       for (i=0; i<M; i++) { /* for each block in the block row */
1368         v_i  = *v + i*bs;
1369         aa_i = aa + bs2*(ai[bn] + i);
1370         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1371       }
1372     }
1373   }
1374 
1375   if (idx) {
1376     *idx = 0;
1377     if (*nz) {
1378       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1379       for (i=0; i<M; i++) { /* for each block in the block row */
1380         idx_i = *idx + i*bs;
1381         itmp  = bs*aj[ai[bn] + i];
1382         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1383       }
1384     }
1385   }
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1391 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1392 {
1393   PetscErrorCode ierr;
1394 
1395   PetscFunctionBegin;
1396   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1397   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1403 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1404 {
1405   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1406   Mat            C;
1407   PetscErrorCode ierr;
1408   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1409   PetscInt       *rows,*cols,bs2=a->bs2;
1410   MatScalar      *array;
1411 
1412   PetscFunctionBegin;
1413   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1414   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1415     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1416     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1417 
1418     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1419     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1420     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1421     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1422     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1423     ierr = PetscFree(col);CHKERRQ(ierr);
1424   } else {
1425     C = *B;
1426   }
1427 
1428   array = a->a;
1429   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1430   cols = rows + bs;
1431   for (i=0; i<mbs; i++) {
1432     cols[0] = i*bs;
1433     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1434     len = ai[i+1] - ai[i];
1435     for (j=0; j<len; j++) {
1436       rows[0] = (*aj++)*bs;
1437       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1438       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1439       array += bs2;
1440     }
1441   }
1442   ierr = PetscFree(rows);CHKERRQ(ierr);
1443 
1444   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1445   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1446 
1447   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1448     *B = C;
1449   } else {
1450     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1451   }
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1457 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1458 {
1459   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1460   PetscErrorCode ierr;
1461   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1462   int            fd;
1463   PetscScalar    *aa;
1464   FILE           *file;
1465 
1466   PetscFunctionBegin;
1467   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1468   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1469   col_lens[0] = MAT_FILE_COOKIE;
1470 
1471   col_lens[1] = A->rmap->N;
1472   col_lens[2] = A->cmap->n;
1473   col_lens[3] = a->nz*bs2;
1474 
1475   /* store lengths of each row and write (including header) to file */
1476   count = 0;
1477   for (i=0; i<a->mbs; i++) {
1478     for (j=0; j<bs; j++) {
1479       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1480     }
1481   }
1482   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1483   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1484 
1485   /* store column indices (zero start index) */
1486   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1487   count = 0;
1488   for (i=0; i<a->mbs; i++) {
1489     for (j=0; j<bs; j++) {
1490       for (k=a->i[i]; k<a->i[i+1]; k++) {
1491         for (l=0; l<bs; l++) {
1492           jj[count++] = bs*a->j[k] + l;
1493         }
1494       }
1495     }
1496   }
1497   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1498   ierr = PetscFree(jj);CHKERRQ(ierr);
1499 
1500   /* store nonzero values */
1501   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1502   count = 0;
1503   for (i=0; i<a->mbs; i++) {
1504     for (j=0; j<bs; j++) {
1505       for (k=a->i[i]; k<a->i[i+1]; k++) {
1506         for (l=0; l<bs; l++) {
1507           aa[count++] = a->a[bs2*k + l*bs + j];
1508         }
1509       }
1510     }
1511   }
1512   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1513   ierr = PetscFree(aa);CHKERRQ(ierr);
1514 
1515   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1516   if (file) {
1517     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1518   }
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 #undef __FUNCT__
1523 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1524 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1525 {
1526   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1527   PetscErrorCode    ierr;
1528   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1529   PetscViewerFormat format;
1530 
1531   PetscFunctionBegin;
1532   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1533   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1534     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1535   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1536     Mat aij;
1537     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1538     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1539     ierr = MatDestroy(aij);CHKERRQ(ierr);
1540   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1541      PetscFunctionReturn(0);
1542   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1543     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1544     for (i=0; i<a->mbs; i++) {
1545       for (j=0; j<bs; j++) {
1546         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1547         for (k=a->i[i]; k<a->i[i+1]; k++) {
1548           for (l=0; l<bs; l++) {
1549 #if defined(PETSC_USE_COMPLEX)
1550             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1551               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1552                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1553             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1554               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1555                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1556             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1557               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1558             }
1559 #else
1560             if (a->a[bs2*k + l*bs + j] != 0.0) {
1561               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1562             }
1563 #endif
1564           }
1565         }
1566         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1567       }
1568     }
1569     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1570   } else {
1571     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1572     for (i=0; i<a->mbs; i++) {
1573       for (j=0; j<bs; j++) {
1574         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1575         for (k=a->i[i]; k<a->i[i+1]; k++) {
1576           for (l=0; l<bs; l++) {
1577 #if defined(PETSC_USE_COMPLEX)
1578             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1579               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1580                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1581             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1582               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1583                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1584             } else {
1585               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1586             }
1587 #else
1588             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1589 #endif
1590           }
1591         }
1592         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1593       }
1594     }
1595     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1596   }
1597   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1603 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1604 {
1605   Mat            A = (Mat) Aa;
1606   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1607   PetscErrorCode ierr;
1608   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1609   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1610   MatScalar      *aa;
1611   PetscViewer    viewer;
1612 
1613   PetscFunctionBegin;
1614 
1615   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1616   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1617 
1618   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1619 
1620   /* loop over matrix elements drawing boxes */
1621   color = PETSC_DRAW_BLUE;
1622   for (i=0,row=0; i<mbs; i++,row+=bs) {
1623     for (j=a->i[i]; j<a->i[i+1]; j++) {
1624       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1625       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1626       aa = a->a + j*bs2;
1627       for (k=0; k<bs; k++) {
1628         for (l=0; l<bs; l++) {
1629           if (PetscRealPart(*aa++) >=  0.) continue;
1630           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1631         }
1632       }
1633     }
1634   }
1635   color = PETSC_DRAW_CYAN;
1636   for (i=0,row=0; i<mbs; i++,row+=bs) {
1637     for (j=a->i[i]; j<a->i[i+1]; j++) {
1638       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1639       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1640       aa = a->a + j*bs2;
1641       for (k=0; k<bs; k++) {
1642         for (l=0; l<bs; l++) {
1643           if (PetscRealPart(*aa++) != 0.) continue;
1644           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1645         }
1646       }
1647     }
1648   }
1649 
1650   color = PETSC_DRAW_RED;
1651   for (i=0,row=0; i<mbs; i++,row+=bs) {
1652     for (j=a->i[i]; j<a->i[i+1]; j++) {
1653       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1654       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1655       aa = a->a + j*bs2;
1656       for (k=0; k<bs; k++) {
1657         for (l=0; l<bs; l++) {
1658           if (PetscRealPart(*aa++) <= 0.) continue;
1659           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1660         }
1661       }
1662     }
1663   }
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1669 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1670 {
1671   PetscErrorCode ierr;
1672   PetscReal      xl,yl,xr,yr,w,h;
1673   PetscDraw      draw;
1674   PetscTruth     isnull;
1675 
1676   PetscFunctionBegin;
1677 
1678   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1679   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1680 
1681   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1682   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1683   xr += w;    yr += h;  xl = -w;     yl = -h;
1684   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1685   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1686   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatView_SeqBAIJ"
1692 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1693 {
1694   PetscErrorCode ierr;
1695   PetscTruth     iascii,isbinary,isdraw;
1696 
1697   PetscFunctionBegin;
1698   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1699   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1700   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1701   if (iascii){
1702     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1703   } else if (isbinary) {
1704     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1705   } else if (isdraw) {
1706     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1707   } else {
1708     Mat B;
1709     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1710     ierr = MatView(B,viewer);CHKERRQ(ierr);
1711     ierr = MatDestroy(B);CHKERRQ(ierr);
1712   }
1713   PetscFunctionReturn(0);
1714 }
1715 
1716 
1717 #undef __FUNCT__
1718 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1719 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1720 {
1721   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1722   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1723   PetscInt    *ai = a->i,*ailen = a->ilen;
1724   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1725   MatScalar   *ap,*aa = a->a;
1726 
1727   PetscFunctionBegin;
1728   for (k=0; k<m; k++) { /* loop over rows */
1729     row  = im[k]; brow = row/bs;
1730     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1731     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1732     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1733     nrow = ailen[brow];
1734     for (l=0; l<n; l++) { /* loop over columns */
1735       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1736       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1737       col  = in[l] ;
1738       bcol = col/bs;
1739       cidx = col%bs;
1740       ridx = row%bs;
1741       high = nrow;
1742       low  = 0; /* assume unsorted */
1743       while (high-low > 5) {
1744         t = (low+high)/2;
1745         if (rp[t] > bcol) high = t;
1746         else             low  = t;
1747       }
1748       for (i=low; i<high; i++) {
1749         if (rp[i] > bcol) break;
1750         if (rp[i] == bcol) {
1751           *v++ = ap[bs2*i+bs*cidx+ridx];
1752           goto finished;
1753         }
1754       }
1755       *v++ = 0.0;
1756       finished:;
1757     }
1758   }
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 #define CHUNKSIZE 10
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1765 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1766 {
1767   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1768   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1769   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1770   PetscErrorCode    ierr;
1771   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1772   PetscTruth        roworiented=a->roworiented;
1773   const PetscScalar *value = v;
1774   MatScalar         *ap,*aa = a->a,*bap;
1775 
1776   PetscFunctionBegin;
1777   if (roworiented) {
1778     stepval = (n-1)*bs;
1779   } else {
1780     stepval = (m-1)*bs;
1781   }
1782   for (k=0; k<m; k++) { /* loop over added rows */
1783     row  = im[k];
1784     if (row < 0) continue;
1785 #if defined(PETSC_USE_DEBUG)
1786     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1787 #endif
1788     rp   = aj + ai[row];
1789     ap   = aa + bs2*ai[row];
1790     rmax = imax[row];
1791     nrow = ailen[row];
1792     low  = 0;
1793     high = nrow;
1794     for (l=0; l<n; l++) { /* loop over added columns */
1795       if (in[l] < 0) continue;
1796 #if defined(PETSC_USE_DEBUG)
1797       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1798 #endif
1799       col = in[l];
1800       if (roworiented) {
1801         value = v + k*(stepval+bs)*bs + l*bs;
1802       } else {
1803         value = v + l*(stepval+bs)*bs + k*bs;
1804       }
1805       if (col <= lastcol) low = 0; else high = nrow;
1806       lastcol = col;
1807       while (high-low > 7) {
1808         t = (low+high)/2;
1809         if (rp[t] > col) high = t;
1810         else             low  = t;
1811       }
1812       for (i=low; i<high; i++) {
1813         if (rp[i] > col) break;
1814         if (rp[i] == col) {
1815           bap  = ap +  bs2*i;
1816           if (roworiented) {
1817             if (is == ADD_VALUES) {
1818               for (ii=0; ii<bs; ii++,value+=stepval) {
1819                 for (jj=ii; jj<bs2; jj+=bs) {
1820                   bap[jj] += *value++;
1821                 }
1822               }
1823             } else {
1824               for (ii=0; ii<bs; ii++,value+=stepval) {
1825                 for (jj=ii; jj<bs2; jj+=bs) {
1826                   bap[jj] = *value++;
1827                 }
1828               }
1829             }
1830           } else {
1831             if (is == ADD_VALUES) {
1832               for (ii=0; ii<bs; ii++,value+=stepval) {
1833                 for (jj=0; jj<bs; jj++) {
1834                   *bap++ += *value++;
1835                 }
1836               }
1837             } else {
1838               for (ii=0; ii<bs; ii++,value+=stepval) {
1839                 for (jj=0; jj<bs; jj++) {
1840                   *bap++  = *value++;
1841                 }
1842               }
1843             }
1844           }
1845           goto noinsert2;
1846         }
1847       }
1848       if (nonew == 1) goto noinsert2;
1849       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1850       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1851       N = nrow++ - 1; high++;
1852       /* shift up all the later entries in this row */
1853       for (ii=N; ii>=i; ii--) {
1854         rp[ii+1] = rp[ii];
1855         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1856       }
1857       if (N >= i) {
1858         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1859       }
1860       rp[i] = col;
1861       bap   = ap +  bs2*i;
1862       if (roworiented) {
1863         for (ii=0; ii<bs; ii++,value+=stepval) {
1864           for (jj=ii; jj<bs2; jj+=bs) {
1865             bap[jj] = *value++;
1866           }
1867         }
1868       } else {
1869         for (ii=0; ii<bs; ii++,value+=stepval) {
1870           for (jj=0; jj<bs; jj++) {
1871             *bap++  = *value++;
1872           }
1873         }
1874       }
1875       noinsert2:;
1876       low = i;
1877     }
1878     ailen[row] = nrow;
1879   }
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1885 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1886 {
1887   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1888   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1889   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1890   PetscErrorCode ierr;
1891   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1892   MatScalar      *aa = a->a,*ap;
1893   PetscReal      ratio=0.6;
1894 
1895   PetscFunctionBegin;
1896   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1897 
1898   if (m) rmax = ailen[0];
1899   for (i=1; i<mbs; i++) {
1900     /* move each row back by the amount of empty slots (fshift) before it*/
1901     fshift += imax[i-1] - ailen[i-1];
1902     rmax   = PetscMax(rmax,ailen[i]);
1903     if (fshift) {
1904       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1905       N = ailen[i];
1906       for (j=0; j<N; j++) {
1907         ip[j-fshift] = ip[j];
1908         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1909       }
1910     }
1911     ai[i] = ai[i-1] + ailen[i-1];
1912   }
1913   if (mbs) {
1914     fshift += imax[mbs-1] - ailen[mbs-1];
1915     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1916   }
1917   /* reset ilen and imax for each row */
1918   for (i=0; i<mbs; i++) {
1919     ailen[i] = imax[i] = ai[i+1] - ai[i];
1920   }
1921   a->nz = ai[mbs];
1922 
1923   /* diagonals may have moved, so kill the diagonal pointers */
1924   a->idiagvalid = PETSC_FALSE;
1925   if (fshift && a->diag) {
1926     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1927     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1928     a->diag = 0;
1929   }
1930   if (fshift && a->nounused == -1) {
1931     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1932   }
1933   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1934   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1935   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1936   a->reallocs          = 0;
1937   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1938 
1939   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1940   if (a->compressedrow.use){
1941     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1942   }
1943 
1944   A->same_nonzero = PETSC_TRUE;
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 /*
1949    This function returns an array of flags which indicate the locations of contiguous
1950    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1951    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1952    Assume: sizes should be long enough to hold all the values.
1953 */
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1956 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1957 {
1958   PetscInt   i,j,k,row;
1959   PetscTruth flg;
1960 
1961   PetscFunctionBegin;
1962   for (i=0,j=0; i<n; j++) {
1963     row = idx[i];
1964     if (row%bs!=0) { /* Not the begining of a block */
1965       sizes[j] = 1;
1966       i++;
1967     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1968       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1969       i++;
1970     } else { /* Begining of the block, so check if the complete block exists */
1971       flg = PETSC_TRUE;
1972       for (k=1; k<bs; k++) {
1973         if (row+k != idx[i+k]) { /* break in the block */
1974           flg = PETSC_FALSE;
1975           break;
1976         }
1977       }
1978       if (flg) { /* No break in the bs */
1979         sizes[j] = bs;
1980         i+= bs;
1981       } else {
1982         sizes[j] = 1;
1983         i++;
1984       }
1985     }
1986   }
1987   *bs_max = j;
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 #undef __FUNCT__
1992 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1993 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1994 {
1995   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1996   PetscErrorCode ierr;
1997   PetscInt       i,j,k,count,*rows;
1998   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1999   PetscScalar    zero = 0.0;
2000   MatScalar      *aa;
2001 
2002   PetscFunctionBegin;
2003   /* Make a copy of the IS and  sort it */
2004   /* allocate memory for rows,sizes */
2005   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
2006   sizes = rows + is_n;
2007 
2008   /* copy IS values to rows, and sort them */
2009   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2010   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2011   if (baij->keepnonzeropattern) {
2012     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2013     bs_max = is_n;
2014     A->same_nonzero = PETSC_TRUE;
2015   } else {
2016     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2017     A->same_nonzero = PETSC_FALSE;
2018   }
2019 
2020   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2021     row   = rows[j];
2022     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2023     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2024     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2025     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2026       if (diag != 0.0) {
2027         if (baij->ilen[row/bs] > 0) {
2028           baij->ilen[row/bs]       = 1;
2029           baij->j[baij->i[row/bs]] = row/bs;
2030           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2031         }
2032         /* Now insert all the diagonal values for this bs */
2033         for (k=0; k<bs; k++) {
2034           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2035         }
2036       } else { /* (diag == 0.0) */
2037         baij->ilen[row/bs] = 0;
2038       } /* end (diag == 0.0) */
2039     } else { /* (sizes[i] != bs) */
2040 #if defined (PETSC_USE_DEBUG)
2041       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2042 #endif
2043       for (k=0; k<count; k++) {
2044         aa[0] =  zero;
2045         aa    += bs;
2046       }
2047       if (diag != 0.0) {
2048         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2049       }
2050     }
2051   }
2052 
2053   ierr = PetscFree(rows);CHKERRQ(ierr);
2054   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 #undef __FUNCT__
2059 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2060 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2061 {
2062   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2063   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2064   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2065   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2066   PetscErrorCode ierr;
2067   PetscInt       ridx,cidx,bs2=a->bs2;
2068   PetscTruth     roworiented=a->roworiented;
2069   MatScalar      *ap,value,*aa=a->a,*bap;
2070 
2071   PetscFunctionBegin;
2072   for (k=0; k<m; k++) { /* loop over added rows */
2073     row  = im[k];
2074     brow = row/bs;
2075     if (row < 0) continue;
2076 #if defined(PETSC_USE_DEBUG)
2077     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2078 #endif
2079     rp   = aj + ai[brow];
2080     ap   = aa + bs2*ai[brow];
2081     rmax = imax[brow];
2082     nrow = ailen[brow];
2083     low  = 0;
2084     high = nrow;
2085     for (l=0; l<n; l++) { /* loop over added columns */
2086       if (in[l] < 0) continue;
2087 #if defined(PETSC_USE_DEBUG)
2088       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2089 #endif
2090       col = in[l]; bcol = col/bs;
2091       ridx = row % bs; cidx = col % bs;
2092       if (roworiented) {
2093         value = v[l + k*n];
2094       } else {
2095         value = v[k + l*m];
2096       }
2097       if (col <= lastcol) low = 0; else high = nrow;
2098       lastcol = col;
2099       while (high-low > 7) {
2100         t = (low+high)/2;
2101         if (rp[t] > bcol) high = t;
2102         else              low  = t;
2103       }
2104       for (i=low; i<high; i++) {
2105         if (rp[i] > bcol) break;
2106         if (rp[i] == bcol) {
2107           bap  = ap +  bs2*i + bs*cidx + ridx;
2108           if (is == ADD_VALUES) *bap += value;
2109           else                  *bap  = value;
2110           goto noinsert1;
2111         }
2112       }
2113       if (nonew == 1) goto noinsert1;
2114       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2115       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2116       N = nrow++ - 1; high++;
2117       /* shift up all the later entries in this row */
2118       for (ii=N; ii>=i; ii--) {
2119         rp[ii+1] = rp[ii];
2120         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2121       }
2122       if (N>=i) {
2123         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2124       }
2125       rp[i]                      = bcol;
2126       ap[bs2*i + bs*cidx + ridx] = value;
2127       a->nz++;
2128       noinsert1:;
2129       low = i;
2130     }
2131     ailen[brow] = nrow;
2132   }
2133   A->same_nonzero = PETSC_FALSE;
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2138 
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2141 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2142 {
2143   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2144   Mat            outA;
2145   PetscErrorCode ierr;
2146   PetscTruth     row_identity,col_identity;
2147 
2148   PetscFunctionBegin;
2149   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2150   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2151   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2152   if (!row_identity || !col_identity) {
2153     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2154   }
2155 
2156   outA          = inA;
2157   inA->factor   = MAT_FACTOR_LU;
2158 
2159   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2160 
2161   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2162   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2163   a->row = row;
2164   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2165   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2166   a->col = col;
2167 
2168   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2169   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2170   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2171 
2172   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2173   if (!a->solve_work) {
2174     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2175     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2176   }
2177   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2178 
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 EXTERN_C_BEGIN
2183 #undef __FUNCT__
2184 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2185 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2186 {
2187   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2188   PetscInt    i,nz,mbs;
2189 
2190   PetscFunctionBegin;
2191   nz  = baij->maxnz/baij->bs2;
2192   mbs = baij->mbs;
2193   for (i=0; i<nz; i++) {
2194     baij->j[i] = indices[i];
2195   }
2196   baij->nz = nz;
2197   for (i=0; i<mbs; i++) {
2198     baij->ilen[i] = baij->imax[i];
2199   }
2200   PetscFunctionReturn(0);
2201 }
2202 EXTERN_C_END
2203 
2204 #undef __FUNCT__
2205 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2206 /*@
2207     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2208        in the matrix.
2209 
2210   Input Parameters:
2211 +  mat - the SeqBAIJ matrix
2212 -  indices - the column indices
2213 
2214   Level: advanced
2215 
2216   Notes:
2217     This can be called if you have precomputed the nonzero structure of the
2218   matrix and want to provide it to the matrix object to improve the performance
2219   of the MatSetValues() operation.
2220 
2221     You MUST have set the correct numbers of nonzeros per row in the call to
2222   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2223 
2224     MUST be called before any calls to MatSetValues();
2225 
2226 @*/
2227 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2228 {
2229   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2230 
2231   PetscFunctionBegin;
2232   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2233   PetscValidPointer(indices,2);
2234   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2235   if (f) {
2236     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2237   } else {
2238     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2239   }
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2245 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2246 {
2247   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2248   PetscErrorCode ierr;
2249   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2250   PetscReal      atmp;
2251   PetscScalar    *x,zero = 0.0;
2252   MatScalar      *aa;
2253   PetscInt       ncols,brow,krow,kcol;
2254 
2255   PetscFunctionBegin;
2256   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2257   bs   = A->rmap->bs;
2258   aa   = a->a;
2259   ai   = a->i;
2260   aj   = a->j;
2261   mbs  = a->mbs;
2262 
2263   ierr = VecSet(v,zero);CHKERRQ(ierr);
2264   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2265   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2266   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2267   for (i=0; i<mbs; i++) {
2268     ncols = ai[1] - ai[0]; ai++;
2269     brow  = bs*i;
2270     for (j=0; j<ncols; j++){
2271       for (kcol=0; kcol<bs; kcol++){
2272         for (krow=0; krow<bs; krow++){
2273           atmp = PetscAbsScalar(*aa);aa++;
2274           row = brow + krow;    /* row index */
2275           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2276           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2277         }
2278       }
2279       aj++;
2280     }
2281   }
2282   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatCopy_SeqBAIJ"
2288 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2289 {
2290   PetscErrorCode ierr;
2291 
2292   PetscFunctionBegin;
2293   /* If the two matrices have the same copy implementation, use fast copy. */
2294   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2295     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2296     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2297 
2298     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2299       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2300     }
2301     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2302   } else {
2303     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2304   }
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #undef __FUNCT__
2309 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2310 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2311 {
2312   PetscErrorCode ierr;
2313 
2314   PetscFunctionBegin;
2315   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 #undef __FUNCT__
2320 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2321 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2322 {
2323   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2324   PetscFunctionBegin;
2325   *array = a->a;
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 #undef __FUNCT__
2330 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2331 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2332 {
2333   PetscFunctionBegin;
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #include "petscblaslapack.h"
2338 #undef __FUNCT__
2339 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2340 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2341 {
2342   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2343   PetscErrorCode ierr;
2344   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2345   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2346 
2347   PetscFunctionBegin;
2348   if (str == SAME_NONZERO_PATTERN) {
2349     PetscScalar alpha = a;
2350     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2351   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2352     if (y->xtoy && y->XtoY != X) {
2353       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2354       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2355     }
2356     if (!y->xtoy) { /* get xtoy */
2357       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2358       y->XtoY = X;
2359       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2360     }
2361     bs2 = bs*bs;
2362     for (i=0; i<x->nz; i++) {
2363       j = 0;
2364       while (j < bs2){
2365         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2366         j++;
2367       }
2368     }
2369     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2370   } else {
2371     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2372   }
2373   PetscFunctionReturn(0);
2374 }
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2378 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2379 {
2380   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2381   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2382   MatScalar      *aa = a->a;
2383 
2384   PetscFunctionBegin;
2385   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2391 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2392 {
2393   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2394   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2395   MatScalar      *aa = a->a;
2396 
2397   PetscFunctionBegin;
2398   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 
2403 /* -------------------------------------------------------------------*/
2404 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2405        MatGetRow_SeqBAIJ,
2406        MatRestoreRow_SeqBAIJ,
2407        MatMult_SeqBAIJ_N,
2408 /* 4*/ MatMultAdd_SeqBAIJ_N,
2409        MatMultTranspose_SeqBAIJ,
2410        MatMultTransposeAdd_SeqBAIJ,
2411        0,
2412        0,
2413        0,
2414 /*10*/ 0,
2415        MatLUFactor_SeqBAIJ,
2416        0,
2417        0,
2418        MatTranspose_SeqBAIJ,
2419 /*15*/ MatGetInfo_SeqBAIJ,
2420        MatEqual_SeqBAIJ,
2421        MatGetDiagonal_SeqBAIJ,
2422        MatDiagonalScale_SeqBAIJ,
2423        MatNorm_SeqBAIJ,
2424 /*20*/ 0,
2425        MatAssemblyEnd_SeqBAIJ,
2426        MatSetOption_SeqBAIJ,
2427        MatZeroEntries_SeqBAIJ,
2428 /*24*/ MatZeroRows_SeqBAIJ,
2429        0,
2430        0,
2431        0,
2432        0,
2433 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2434        0,
2435        0,
2436        MatGetArray_SeqBAIJ,
2437        MatRestoreArray_SeqBAIJ,
2438 /*34*/ MatDuplicate_SeqBAIJ,
2439        0,
2440        0,
2441        MatILUFactor_SeqBAIJ,
2442        0,
2443 /*39*/ MatAXPY_SeqBAIJ,
2444        MatGetSubMatrices_SeqBAIJ,
2445        MatIncreaseOverlap_SeqBAIJ,
2446        MatGetValues_SeqBAIJ,
2447        MatCopy_SeqBAIJ,
2448 /*44*/ 0,
2449        MatScale_SeqBAIJ,
2450        0,
2451        0,
2452        MatILUDTFactor_SeqBAIJ,
2453 /*49*/ 0,
2454        MatGetRowIJ_SeqBAIJ,
2455        MatRestoreRowIJ_SeqBAIJ,
2456        0,
2457        0,
2458 /*54*/ 0,
2459        0,
2460        0,
2461        0,
2462        MatSetValuesBlocked_SeqBAIJ,
2463 /*59*/ MatGetSubMatrix_SeqBAIJ,
2464        MatDestroy_SeqBAIJ,
2465        MatView_SeqBAIJ,
2466        0,
2467        0,
2468 /*64*/ 0,
2469        0,
2470        0,
2471        0,
2472        0,
2473 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2474        0,
2475        MatConvert_Basic,
2476        0,
2477        0,
2478 /*74*/ 0,
2479        0,
2480        0,
2481        0,
2482        0,
2483 /*79*/ 0,
2484        0,
2485        0,
2486        0,
2487        MatLoad_SeqBAIJ,
2488 /*84*/ 0,
2489        0,
2490        0,
2491        0,
2492        0,
2493 /*89*/ 0,
2494        0,
2495        0,
2496        0,
2497        0,
2498 /*94*/ 0,
2499        0,
2500        0,
2501        0,
2502        0,
2503 /*99*/0,
2504        0,
2505        0,
2506        0,
2507        0,
2508 /*104*/0,
2509        MatRealPart_SeqBAIJ,
2510        MatImaginaryPart_SeqBAIJ,
2511        0,
2512        0,
2513 /*109*/0,
2514        0,
2515        0,
2516        0,
2517        MatMissingDiagonal_SeqBAIJ
2518 /*114*/
2519 };
2520 
2521 EXTERN_C_BEGIN
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2524 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2525 {
2526   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2527   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2528   PetscErrorCode ierr;
2529 
2530   PetscFunctionBegin;
2531   if (aij->nonew != 1) {
2532     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2533   }
2534 
2535   /* allocate space for values if not already there */
2536   if (!aij->saved_values) {
2537     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2538   }
2539 
2540   /* copy values over */
2541   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 EXTERN_C_END
2545 
2546 EXTERN_C_BEGIN
2547 #undef __FUNCT__
2548 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2549 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2550 {
2551   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2552   PetscErrorCode ierr;
2553   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2554 
2555   PetscFunctionBegin;
2556   if (aij->nonew != 1) {
2557     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2558   }
2559   if (!aij->saved_values) {
2560     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2561   }
2562 
2563   /* copy values over */
2564   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2565   PetscFunctionReturn(0);
2566 }
2567 EXTERN_C_END
2568 
2569 EXTERN_C_BEGIN
2570 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2571 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2572 EXTERN_C_END
2573 
2574 EXTERN_C_BEGIN
2575 #undef __FUNCT__
2576 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2577 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2578 {
2579   Mat_SeqBAIJ    *b;
2580   PetscErrorCode ierr;
2581   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2582   PetscTruth     flg,skipallocation = PETSC_FALSE;
2583 
2584   PetscFunctionBegin;
2585 
2586   if (nz == MAT_SKIP_ALLOCATION) {
2587     skipallocation = PETSC_TRUE;
2588     nz             = 0;
2589   }
2590 
2591   if (bs < 0) {
2592     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2593       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2594     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2595     bs   = PetscAbs(bs);
2596   }
2597   if (nnz && newbs != bs) {
2598     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2599   }
2600   bs   = newbs;
2601 
2602   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2603   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2604   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2605   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2606 
2607   B->preallocated = PETSC_TRUE;
2608 
2609   mbs  = B->rmap->n/bs;
2610   nbs  = B->cmap->n/bs;
2611   bs2  = bs*bs;
2612 
2613   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2614     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2615   }
2616 
2617   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2618   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2619   if (nnz) {
2620     for (i=0; i<mbs; i++) {
2621       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2622       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2623     }
2624   }
2625 
2626   b       = (Mat_SeqBAIJ*)B->data;
2627   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2628     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2629   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2630 
2631   if (!flg) {
2632     switch (bs) {
2633     case 1:
2634       B->ops->mult            = MatMult_SeqBAIJ_1;
2635       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2636       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2637       break;
2638     case 2:
2639       B->ops->mult            = MatMult_SeqBAIJ_2;
2640       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2641       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2642       break;
2643     case 3:
2644       B->ops->mult            = MatMult_SeqBAIJ_3;
2645       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2646       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2647       break;
2648     case 4:
2649       B->ops->mult            = MatMult_SeqBAIJ_4;
2650       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2651       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2652       break;
2653     case 5:
2654       B->ops->mult            = MatMult_SeqBAIJ_5;
2655       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2656       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2657       break;
2658     case 6:
2659       B->ops->mult            = MatMult_SeqBAIJ_6;
2660       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2661       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2662       break;
2663     case 7:
2664       B->ops->mult            = MatMult_SeqBAIJ_7;
2665       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2666       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2667       break;
2668     default:
2669       B->ops->mult            = MatMult_SeqBAIJ_N;
2670       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2671       break;
2672     }
2673   }
2674   B->rmap->bs      = bs;
2675   b->mbs     = mbs;
2676   b->nbs     = nbs;
2677   if (!skipallocation) {
2678     if (!b->imax) {
2679       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2680     }
2681     /* b->ilen will count nonzeros in each block row so far. */
2682     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2683     if (!nnz) {
2684       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2685       else if (nz <= 0)        nz = 1;
2686       for (i=0; i<mbs; i++) b->imax[i] = nz;
2687       nz = nz*mbs;
2688     } else {
2689       nz = 0;
2690       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2691     }
2692 
2693     /* allocate the matrix space */
2694     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2695     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2696     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2697     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2698     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2699     b->singlemalloc = PETSC_TRUE;
2700     b->i[0] = 0;
2701     for (i=1; i<mbs+1; i++) {
2702       b->i[i] = b->i[i-1] + b->imax[i-1];
2703     }
2704     b->free_a     = PETSC_TRUE;
2705     b->free_ij    = PETSC_TRUE;
2706   } else {
2707     b->free_a     = PETSC_FALSE;
2708     b->free_ij    = PETSC_FALSE;
2709   }
2710 
2711   B->rmap->bs          = bs;
2712   b->bs2              = bs2;
2713   b->mbs              = mbs;
2714   b->nz               = 0;
2715   b->maxnz            = nz*bs2;
2716   B->info.nz_unneeded = (PetscReal)b->maxnz;
2717   PetscFunctionReturn(0);
2718 }
2719 EXTERN_C_END
2720 
2721 EXTERN_C_BEGIN
2722 #undef __FUNCT__
2723 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2724 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2725 {
2726   PetscInt       i,m,nz,nz_max=0,*nnz;
2727   PetscScalar    *values=0;
2728   PetscErrorCode ierr;
2729 
2730   PetscFunctionBegin;
2731 
2732   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2733 
2734   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2735   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2736   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2737   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2738   m = B->rmap->n/bs;
2739 
2740   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
2741   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
2742   for(i=0; i<m; i++) {
2743     nz = ii[i+1]- ii[i];
2744     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
2745     nz_max = PetscMax(nz_max, nz);
2746     nnz[i] = nz;
2747   }
2748   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2749   ierr = PetscFree(nnz);CHKERRQ(ierr);
2750 
2751   values = (PetscScalar*)V;
2752   if (!values) {
2753     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2754     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2755   }
2756   for (i=0; i<m; i++) {
2757     PetscInt          ncols  = ii[i+1] - ii[i];
2758     const PetscInt    *icols = jj + ii[i];
2759     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2760     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2761   }
2762   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2763   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2764   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2765 
2766   PetscFunctionReturn(0);
2767 }
2768 EXTERN_C_END
2769 
2770 
2771 EXTERN_C_BEGIN
2772 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2773 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
2774 EXTERN_C_END
2775 
2776 /*MC
2777    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2778    block sparse compressed row format.
2779 
2780    Options Database Keys:
2781 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2782 
2783   Level: beginner
2784 
2785 .seealso: MatCreateSeqBAIJ()
2786 M*/
2787 
2788 
2789 EXTERN_C_BEGIN
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatCreate_SeqBAIJ"
2792 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2793 {
2794   PetscErrorCode ierr;
2795   PetscMPIInt    size;
2796   Mat_SeqBAIJ    *b;
2797 
2798   PetscFunctionBegin;
2799   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2800   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2801 
2802   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2803   B->data = (void*)b;
2804   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2805   B->mapping               = 0;
2806   b->row                   = 0;
2807   b->col                   = 0;
2808   b->icol                  = 0;
2809   b->reallocs              = 0;
2810   b->saved_values          = 0;
2811 
2812   b->roworiented           = PETSC_TRUE;
2813   b->nonew                 = 0;
2814   b->diag                  = 0;
2815   b->solve_work            = 0;
2816   b->mult_work             = 0;
2817   B->spptr                 = 0;
2818   B->info.nz_unneeded      = (PetscReal)b->maxnz;
2819   b->keepnonzeropattern    = PETSC_FALSE;
2820   b->xtoy                  = 0;
2821   b->XtoY                  = 0;
2822   b->compressedrow.use     = PETSC_FALSE;
2823   b->compressedrow.nrows   = 0;
2824   b->compressedrow.i       = PETSC_NULL;
2825   b->compressedrow.rindex  = PETSC_NULL;
2826   b->compressedrow.checked = PETSC_FALSE;
2827   B->same_nonzero          = PETSC_FALSE;
2828 
2829   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
2830                                      "MatGetFactorAvailable_seqbaij_petsc",
2831                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
2832   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2833                                      "MatGetFactor_seqbaij_petsc",
2834                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
2835   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2836                                      "MatInvertBlockDiagonal_SeqBAIJ",
2837                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2838   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2839                                      "MatStoreValues_SeqBAIJ",
2840                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2841   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2842                                      "MatRetrieveValues_SeqBAIJ",
2843                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2844   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2845                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2846                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2847   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2848                                      "MatConvert_SeqBAIJ_SeqAIJ",
2849                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2850   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2851                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2852                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2853   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2854                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2855                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2856   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
2857                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
2858                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
2859   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2860   PetscFunctionReturn(0);
2861 }
2862 EXTERN_C_END
2863 
2864 #undef __FUNCT__
2865 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
2866 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues)
2867 {
2868   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2869   PetscErrorCode ierr;
2870   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2871 
2872   PetscFunctionBegin;
2873   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2874 
2875   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2876   for (i=0; i<mbs; i++) {
2877     c->imax[i] = a->imax[i];
2878     c->ilen[i] = a->ilen[i];
2879   }
2880 
2881   /* allocate the matrix space */
2882   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2883   c->singlemalloc = PETSC_TRUE;
2884   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2885   if (mbs > 0) {
2886     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2887     if (cpvalues == MAT_COPY_VALUES) {
2888       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2889     } else {
2890       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2891     }
2892   }
2893   c->roworiented = a->roworiented;
2894   c->nonew       = a->nonew;
2895   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
2896   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
2897   c->bs2         = a->bs2;
2898   c->mbs         = a->mbs;
2899   c->nbs         = a->nbs;
2900 
2901   if (a->diag) {
2902     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2903     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2904     for (i=0; i<mbs; i++) {
2905       c->diag[i] = a->diag[i];
2906     }
2907   } else c->diag        = 0;
2908   c->nz                 = a->nz;
2909   c->maxnz              = a->maxnz;
2910   c->solve_work         = 0;
2911   c->mult_work          = 0;
2912   c->free_a             = PETSC_TRUE;
2913   c->free_ij            = PETSC_TRUE;
2914   C->preallocated       = PETSC_TRUE;
2915   C->assembled          = PETSC_TRUE;
2916 
2917   c->compressedrow.use     = a->compressedrow.use;
2918   c->compressedrow.nrows   = a->compressedrow.nrows;
2919   c->compressedrow.checked = a->compressedrow.checked;
2920   if ( a->compressedrow.checked && a->compressedrow.use){
2921     i = a->compressedrow.nrows;
2922     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2923     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2924     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2925     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2926   } else {
2927     c->compressedrow.use    = PETSC_FALSE;
2928     c->compressedrow.i      = PETSC_NULL;
2929     c->compressedrow.rindex = PETSC_NULL;
2930   }
2931   C->same_nonzero = A->same_nonzero;
2932   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2933   PetscFunctionReturn(0);
2934 }
2935 
2936 #undef __FUNCT__
2937 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2938 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2939 {
2940     PetscErrorCode ierr;
2941 
2942   PetscFunctionBegin;
2943   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
2944   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
2945   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
2946   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues);
2947   PetscFunctionReturn(0);
2948 }
2949 
2950 #undef __FUNCT__
2951 #define __FUNCT__ "MatLoad_SeqBAIJ"
2952 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2953 {
2954   Mat_SeqBAIJ    *a;
2955   Mat            B;
2956   PetscErrorCode ierr;
2957   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2958   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2959   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2960   PetscInt       *masked,nmask,tmp,bs2,ishift;
2961   PetscMPIInt    size;
2962   int            fd;
2963   PetscScalar    *aa;
2964   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2965 
2966   PetscFunctionBegin;
2967   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2968     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2969   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2970   bs2  = bs*bs;
2971 
2972   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2973   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2974   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2975   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2976   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2977   M = header[1]; N = header[2]; nz = header[3];
2978 
2979   if (header[3] < 0) {
2980     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2981   }
2982 
2983   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2984 
2985   /*
2986      This code adds extra rows to make sure the number of rows is
2987     divisible by the blocksize
2988   */
2989   mbs        = M/bs;
2990   extra_rows = bs - M + bs*(mbs);
2991   if (extra_rows == bs) extra_rows = 0;
2992   else                  mbs++;
2993   if (extra_rows) {
2994     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2995   }
2996 
2997   /* read in row lengths */
2998   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2999   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3000   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3001 
3002   /* read in column indices */
3003   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3004   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3005   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3006 
3007   /* loop over row lengths determining block row lengths */
3008   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3009   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3010   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
3011   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3012   masked   = mask + mbs;
3013   rowcount = 0; nzcount = 0;
3014   for (i=0; i<mbs; i++) {
3015     nmask = 0;
3016     for (j=0; j<bs; j++) {
3017       kmax = rowlengths[rowcount];
3018       for (k=0; k<kmax; k++) {
3019         tmp = jj[nzcount++]/bs;
3020         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3021       }
3022       rowcount++;
3023     }
3024     browlengths[i] += nmask;
3025     /* zero out the mask elements we set */
3026     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3027   }
3028 
3029   /* create our matrix */
3030   ierr = MatCreate(comm,&B);
3031   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3032   ierr = MatSetType(B,type);CHKERRQ(ierr);
3033   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3034   a = (Mat_SeqBAIJ*)B->data;
3035 
3036   /* set matrix "i" values */
3037   a->i[0] = 0;
3038   for (i=1; i<= mbs; i++) {
3039     a->i[i]      = a->i[i-1] + browlengths[i-1];
3040     a->ilen[i-1] = browlengths[i-1];
3041   }
3042   a->nz         = 0;
3043   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3044 
3045   /* read in nonzero values */
3046   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3047   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3048   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3049 
3050   /* set "a" and "j" values into matrix */
3051   nzcount = 0; jcount = 0;
3052   for (i=0; i<mbs; i++) {
3053     nzcountb = nzcount;
3054     nmask    = 0;
3055     for (j=0; j<bs; j++) {
3056       kmax = rowlengths[i*bs+j];
3057       for (k=0; k<kmax; k++) {
3058         tmp = jj[nzcount++]/bs;
3059 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3060       }
3061     }
3062     /* sort the masked values */
3063     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3064 
3065     /* set "j" values into matrix */
3066     maskcount = 1;
3067     for (j=0; j<nmask; j++) {
3068       a->j[jcount++]  = masked[j];
3069       mask[masked[j]] = maskcount++;
3070     }
3071     /* set "a" values into matrix */
3072     ishift = bs2*a->i[i];
3073     for (j=0; j<bs; j++) {
3074       kmax = rowlengths[i*bs+j];
3075       for (k=0; k<kmax; k++) {
3076         tmp       = jj[nzcountb]/bs ;
3077         block     = mask[tmp] - 1;
3078         point     = jj[nzcountb] - bs*tmp;
3079         idx       = ishift + bs2*block + j + bs*point;
3080         a->a[idx] = (MatScalar)aa[nzcountb++];
3081       }
3082     }
3083     /* zero out the mask elements we set */
3084     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3085   }
3086   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3087 
3088   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3089   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3090   ierr = PetscFree(aa);CHKERRQ(ierr);
3091   ierr = PetscFree(jj);CHKERRQ(ierr);
3092   ierr = PetscFree(mask);CHKERRQ(ierr);
3093 
3094   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3095   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3096   ierr = MatView_Private(B);CHKERRQ(ierr);
3097 
3098   *A = B;
3099   PetscFunctionReturn(0);
3100 }
3101 
3102 #undef __FUNCT__
3103 #define __FUNCT__ "MatCreateSeqBAIJ"
3104 /*@C
3105    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3106    compressed row) format.  For good matrix assembly performance the
3107    user should preallocate the matrix storage by setting the parameter nz
3108    (or the array nnz).  By setting these parameters accurately, performance
3109    during matrix assembly can be increased by more than a factor of 50.
3110 
3111    Collective on MPI_Comm
3112 
3113    Input Parameters:
3114 +  comm - MPI communicator, set to PETSC_COMM_SELF
3115 .  bs - size of block
3116 .  m - number of rows
3117 .  n - number of columns
3118 .  nz - number of nonzero blocks  per block row (same for all rows)
3119 -  nnz - array containing the number of nonzero blocks in the various block rows
3120          (possibly different for each block row) or PETSC_NULL
3121 
3122    Output Parameter:
3123 .  A - the matrix
3124 
3125    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3126    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3127    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3128 
3129    Options Database Keys:
3130 .   -mat_no_unroll - uses code that does not unroll the loops in the
3131                      block calculations (much slower)
3132 .    -mat_block_size - size of the blocks to use
3133 
3134    Level: intermediate
3135 
3136    Notes:
3137    The number of rows and columns must be divisible by blocksize.
3138 
3139    If the nnz parameter is given then the nz parameter is ignored
3140 
3141    A nonzero block is any block that as 1 or more nonzeros in it
3142 
3143    The block AIJ format is fully compatible with standard Fortran 77
3144    storage.  That is, the stored row and column indices can begin at
3145    either one (as in Fortran) or zero.  See the users' manual for details.
3146 
3147    Specify the preallocated storage with either nz or nnz (not both).
3148    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3149    allocation.  For additional details, see the users manual chapter on
3150    matrices.
3151 
3152 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3153 @*/
3154 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3155 {
3156   PetscErrorCode ierr;
3157 
3158   PetscFunctionBegin;
3159   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3160   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3161   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3162   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3163   PetscFunctionReturn(0);
3164 }
3165 
3166 #undef __FUNCT__
3167 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3168 /*@C
3169    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3170    per row in the matrix. For good matrix assembly performance the
3171    user should preallocate the matrix storage by setting the parameter nz
3172    (or the array nnz).  By setting these parameters accurately, performance
3173    during matrix assembly can be increased by more than a factor of 50.
3174 
3175    Collective on MPI_Comm
3176 
3177    Input Parameters:
3178 +  A - the matrix
3179 .  bs - size of block
3180 .  nz - number of block nonzeros per block row (same for all rows)
3181 -  nnz - array containing the number of block nonzeros in the various block rows
3182          (possibly different for each block row) or PETSC_NULL
3183 
3184    Options Database Keys:
3185 .   -mat_no_unroll - uses code that does not unroll the loops in the
3186                      block calculations (much slower)
3187 .    -mat_block_size - size of the blocks to use
3188 
3189    Level: intermediate
3190 
3191    Notes:
3192    If the nnz parameter is given then the nz parameter is ignored
3193 
3194    You can call MatGetInfo() to get information on how effective the preallocation was;
3195    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3196    You can also run with the option -info and look for messages with the string
3197    malloc in them to see if additional memory allocation was needed.
3198 
3199    The block AIJ format is fully compatible with standard Fortran 77
3200    storage.  That is, the stored row and column indices can begin at
3201    either one (as in Fortran) or zero.  See the users' manual for details.
3202 
3203    Specify the preallocated storage with either nz or nnz (not both).
3204    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3205    allocation.  For additional details, see the users manual chapter on
3206    matrices.
3207 
3208 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3209 @*/
3210 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3211 {
3212   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3213 
3214   PetscFunctionBegin;
3215   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3216   if (f) {
3217     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3218   }
3219   PetscFunctionReturn(0);
3220 }
3221 
3222 #undef __FUNCT__
3223 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3224 /*@C
3225    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3226    (the default sequential PETSc format).
3227 
3228    Collective on MPI_Comm
3229 
3230    Input Parameters:
3231 +  A - the matrix
3232 .  i - the indices into j for the start of each local row (starts with zero)
3233 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3234 -  v - optional values in the matrix
3235 
3236    Level: developer
3237 
3238 .keywords: matrix, aij, compressed row, sparse
3239 
3240 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3241 @*/
3242 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3243 {
3244   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3245 
3246   PetscFunctionBegin;
3247   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3248   if (f) {
3249     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3250   }
3251   PetscFunctionReturn(0);
3252 }
3253 
3254 
3255 #undef __FUNCT__
3256 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3257 /*@
3258      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3259               (upper triangular entries in CSR format) provided by the user.
3260 
3261      Collective on MPI_Comm
3262 
3263    Input Parameters:
3264 +  comm - must be an MPI communicator of size 1
3265 .  bs - size of block
3266 .  m - number of rows
3267 .  n - number of columns
3268 .  i - row indices
3269 .  j - column indices
3270 -  a - matrix values
3271 
3272    Output Parameter:
3273 .  mat - the matrix
3274 
3275    Level: intermediate
3276 
3277    Notes:
3278        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3279     once the matrix is destroyed
3280 
3281        You cannot set new nonzero locations into this matrix, that will generate an error.
3282 
3283        The i and j indices are 0 based
3284 
3285 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3286 
3287 @*/
3288 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3289 {
3290   PetscErrorCode ierr;
3291   PetscInt       ii;
3292   Mat_SeqBAIJ    *baij;
3293 
3294   PetscFunctionBegin;
3295   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3296   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3297 
3298   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3299   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3300   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3301   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3302   baij = (Mat_SeqBAIJ*)(*mat)->data;
3303   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3304 
3305   baij->i = i;
3306   baij->j = j;
3307   baij->a = a;
3308   baij->singlemalloc = PETSC_FALSE;
3309   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3310   baij->free_a       = PETSC_FALSE;
3311   baij->free_ij       = PETSC_FALSE;
3312 
3313   for (ii=0; ii<m; ii++) {
3314     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3315 #if defined(PETSC_USE_DEBUG)
3316     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3317 #endif
3318   }
3319 #if defined(PETSC_USE_DEBUG)
3320   for (ii=0; ii<baij->i[m]; ii++) {
3321     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3322     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3323   }
3324 #endif
3325 
3326   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3327   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3328   PetscFunctionReturn(0);
3329 }
3330