xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7842634f46ad2ee292fa135afd07ef040c80768f)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
8 #include "petscblaslapack.h"
9 #include "../src/mat/blockinvert.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
13 /*@
14   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
15 
16   Collective on Mat
17 
18   Input Parameters:
19 . mat - the matrix
20 
21   Level: advanced
22 @*/
23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
24 {
25   PetscErrorCode ierr,(*f)(Mat);
26 
27   PetscFunctionBegin;
28   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
29   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
30   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
31 
32   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
33   if (f) {
34     ierr = (*f)(mat);CHKERRQ(ierr);
35   } else {
36     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 EXTERN_C_BEGIN
42 #undef __FUNCT__
43 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
44 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
45 {
46   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
47   PetscErrorCode ierr;
48   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5];
49   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25];
50   PetscReal      shift = 0.0;
51 
52   PetscFunctionBegin;
53   if (a->idiagvalid) PetscFunctionReturn(0);
54   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
55   diag_offset = a->diag;
56   if (!a->idiag) {
57     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
58     ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 1:
65       for (i=0; i<mbs; i++) {
66         odiag = v + 1*diag_offset[i];
67         diag[0]  = odiag[0];
68         mdiag[0] = odiag[0];
69         diag[0]  = 1.0 / (diag[0] + shift);
70         diag    += 1;
71         mdiag   += 1;
72       }
73       break;
74     case 2:
75       for (i=0; i<mbs; i++) {
76         odiag   = v + 4*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
80 	diag    += 4;
81 	mdiag   += 4;
82       }
83       break;
84     case 3:
85       for (i=0; i<mbs; i++) {
86         odiag    = v + 9*diag_offset[i];
87         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
88         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
89         diag[8]  = odiag[8];
90         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
91         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
92         mdiag[8] = odiag[8];
93 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
94 	diag    += 9;
95 	mdiag   += 9;
96       }
97       break;
98     case 4:
99       for (i=0; i<mbs; i++) {
100         odiag  = v + 16*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
104 	diag  += 16;
105 	mdiag += 16;
106       }
107       break;
108     case 5:
109       for (i=0; i<mbs; i++) {
110         odiag = v + 25*diag_offset[i];
111         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
114 	diag  += 25;
115 	mdiag += 25;
116       }
117       break;
118     case 6:
119       for (i=0; i<mbs; i++) {
120         odiag = v + 36*diag_offset[i];
121         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
124 	diag  += 36;
125 	mdiag += 36;
126       }
127       break;
128     default:
129       SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"not supported for block size %D",bs);
130   }
131   a->idiagvalid = PETSC_TRUE;
132   PetscFunctionReturn(0);
133 }
134 EXTERN_C_END
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
138 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
139 {
140   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
141   PetscScalar        *x,x1,s1;
142   const PetscScalar  *b;
143   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
144   PetscErrorCode     ierr;
145   PetscInt           m = a->mbs,i,i2,nz,j;
146   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
147 
148   PetscFunctionBegin;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150   its = its*lits;
151   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
152   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
153   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
154   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
155   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
156 
157   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
158 
159   diag  = a->diag;
160   idiag = a->idiag;
161   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
162   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
163 
164   if (flag & SOR_ZERO_INITIAL_GUESS) {
165     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
166       x[0] = b[0]*idiag[0];
167       i2     = 1;
168       idiag += 1;
169       for (i=1; i<m; i++) {
170         v     = aa + ai[i];
171         vi    = aj + ai[i];
172         nz    = diag[i] - ai[i];
173         s1    = b[i2];
174         for (j=0; j<nz; j++) {
175           s1 -= v[j]*x[vi[j]];
176         }
177         x[i2]   = idiag[0]*s1;
178         idiag   += 1;
179         i2      += 1;
180       }
181       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
182       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
183     }
184     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
185         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
186       i2    = 0;
187       mdiag = a->idiag+a->mbs;
188       for (i=0; i<m; i++) {
189         x1      = x[i2];
190         x[i2]   = mdiag[0]*x1;
191         mdiag  += 1;
192         i2     += 1;
193       }
194       ierr = PetscLogFlops(m);CHKERRQ(ierr);
195     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
196       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
197     }
198     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
199       idiag   = a->idiag+a->mbs - 1;
200       i2      = m - 1;
201       x1      = x[i2];
202       x[i2]   = idiag[0]*x1;
203       idiag -= 1;
204       i2    -= 1;
205       for (i=m-2; i>=0; i--) {
206         v     = aa + (diag[i]+1);
207         vi    = aj + diag[i] + 1;
208         nz    = ai[i+1] - diag[i] - 1;
209         s1    = x[i2];
210         for (j=0; j<nz; j++) {
211           s1 -= v[j]*x[vi[j]];
212         }
213         x[i2]   = idiag[0]*s1;
214         idiag   -= 1;
215         i2      -= 1;
216       }
217       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
218     }
219   } else {
220     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
221   }
222   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
223   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 #undef __FUNCT__
228 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
229 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
230 {
231   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
232   PetscScalar        *x,x1,x2,s1,s2;
233   const PetscScalar  *b;
234   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
235   PetscErrorCode     ierr;
236   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
237   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
238 
239   PetscFunctionBegin;
240   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
241   its = its*lits;
242   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
243   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
244   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
245   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
246   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
247 
248   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
249 
250   diag  = a->diag;
251   idiag = a->idiag;
252   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
253   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
254 
255   if (flag & SOR_ZERO_INITIAL_GUESS) {
256     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
257       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
258       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
259       i2     = 2;
260       idiag += 4;
261       for (i=1; i<m; i++) {
262 	v     = aa + 4*ai[i];
263 	vi    = aj + ai[i];
264 	nz    = diag[i] - ai[i];
265 	s1    = b[i2]; s2 = b[i2+1];
266         for (j=0; j<nz; j++) {
267 	  idx  = 2*vi[j];
268           it   = 4*j;
269 	  x1   = x[idx]; x2 = x[1+idx];
270 	  s1  -= v[it]*x1 + v[it+2]*x2;
271 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
272 	}
273 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
274 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
275         idiag   += 4;
276         i2      += 2;
277       }
278       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
279       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
280     }
281     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
282         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
283       i2    = 0;
284       mdiag = a->idiag+4*a->mbs;
285       for (i=0; i<m; i++) {
286         x1      = x[i2]; x2 = x[i2+1];
287         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
288         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
289         mdiag  += 4;
290         i2     += 2;
291       }
292       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
293     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
294       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
295     }
296     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
297       idiag   = a->idiag+4*a->mbs - 4;
298       i2      = 2*m - 2;
299       x1      = x[i2]; x2 = x[i2+1];
300       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
301       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
302       idiag -= 4;
303       i2    -= 2;
304       for (i=m-2; i>=0; i--) {
305 	v     = aa + 4*(diag[i]+1);
306 	vi    = aj + diag[i] + 1;
307 	nz    = ai[i+1] - diag[i] - 1;
308 	s1    = x[i2]; s2 = x[i2+1];
309         for (j=0; j<nz; j++) {
310  	  idx  = 2*vi[j];
311           it   = 4*j;
312 	  x1   = x[idx]; x2 = x[1+idx];
313 	  s1  -= v[it]*x1 + v[it+2]*x2;
314 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
315 	}
316 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
317 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
318         idiag   -= 4;
319         i2      -= 2;
320       }
321       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
322     }
323   } else {
324     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
325   }
326   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
327   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 
331 #undef __FUNCT__
332 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
333 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
334 {
335   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
336   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
337   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
338   const PetscScalar  *b;
339   PetscErrorCode     ierr;
340   PetscInt           m = a->mbs,i,i2,nz,idx;
341   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
342 
343   PetscFunctionBegin;
344   its = its*lits;
345   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
346   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
347   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
348   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
349   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
350   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
351 
352   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
353 
354   diag  = a->diag;
355   idiag = a->idiag;
356   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
357   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
358 
359   if (flag & SOR_ZERO_INITIAL_GUESS) {
360     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
361       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
362       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
363       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
364       i2     = 3;
365       idiag += 9;
366       for (i=1; i<m; i++) {
367         v     = aa + 9*ai[i];
368         vi    = aj + ai[i];
369         nz    = diag[i] - ai[i];
370         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
371         while (nz--) {
372           idx  = 3*(*vi++);
373           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
374           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
375           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
376           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
377           v   += 9;
378         }
379         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
380         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
381         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
382         idiag   += 9;
383         i2      += 3;
384       }
385       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
386       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
387     }
388     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
389         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
390       i2    = 0;
391       mdiag = a->idiag+9*a->mbs;
392       for (i=0; i<m; i++) {
393         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
394         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
395         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
396         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
397         mdiag  += 9;
398         i2     += 3;
399       }
400       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
401     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
402       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
403     }
404     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
405       idiag   = a->idiag+9*a->mbs - 9;
406       i2      = 3*m - 3;
407       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
408       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
409       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
410       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
411       idiag -= 9;
412       i2    -= 3;
413       for (i=m-2; i>=0; i--) {
414         v     = aa + 9*(diag[i]+1);
415         vi    = aj + diag[i] + 1;
416         nz    = ai[i+1] - diag[i] - 1;
417         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
418         while (nz--) {
419           idx  = 3*(*vi++);
420           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
421           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
422           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
423           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
424           v   += 9;
425         }
426         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
427         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
428         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
429         idiag   -= 9;
430         i2      -= 3;
431       }
432       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
433     }
434   } else {
435     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
436   }
437   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
438   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 #undef __FUNCT__
443 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
444 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
445 {
446   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
447   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
448   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
449   const PetscScalar  *b;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
456   its = its*lits;
457   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
461   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
462 
463   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
464 
465   diag  = a->diag;
466   idiag = a->idiag;
467   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
468   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
469 
470   if (flag & SOR_ZERO_INITIAL_GUESS) {
471     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
473       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
474       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
475       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
476       i2     = 4;
477       idiag += 16;
478       for (i=1; i<m; i++) {
479 	v     = aa + 16*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
483 	while (nz--) {
484 	  idx  = 4*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
486 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
487 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
488 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
489 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
490 	  v   += 16;
491 	}
492 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
493 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
494 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
495 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
496         idiag   += 16;
497         i2      += 4;
498       }
499       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
500       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
501     }
502     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
503         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
504       i2    = 0;
505       mdiag = a->idiag+16*a->mbs;
506       for (i=0; i<m; i++) {
507         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
508         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
509         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
510         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
511         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
512         mdiag  += 16;
513         i2     += 4;
514       }
515       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
516     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
517       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
518     }
519     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
520       idiag   = a->idiag+16*a->mbs - 16;
521       i2      = 4*m - 4;
522       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
523       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
524       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
525       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
526       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
527       idiag -= 16;
528       i2    -= 4;
529       for (i=m-2; i>=0; i--) {
530 	v     = aa + 16*(diag[i]+1);
531 	vi    = aj + diag[i] + 1;
532 	nz    = ai[i+1] - diag[i] - 1;
533 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
534 	while (nz--) {
535 	  idx  = 4*(*vi++);
536 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
537 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
538 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
539 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
540 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
541 	  v   += 16;
542 	}
543 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
544 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
545 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
546 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
547         idiag   -= 16;
548         i2      -= 4;
549       }
550       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
551     }
552   } else {
553     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
554   }
555   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
556   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
562 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
563 {
564   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
565   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
566   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
567   const PetscScalar  *b;
568   PetscErrorCode     ierr;
569   PetscInt           m = a->mbs,i,i2,nz,idx;
570   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
571 
572   PetscFunctionBegin;
573   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
574   its = its*lits;
575   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
576   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
577   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
578   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
579   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
580 
581   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
582 
583   diag  = a->diag;
584   idiag = a->idiag;
585   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
586   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
587 
588   if (flag & SOR_ZERO_INITIAL_GUESS) {
589     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
590       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
591       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
592       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
593       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
594       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
595       i2     = 5;
596       idiag += 25;
597       for (i=1; i<m; i++) {
598 	v     = aa + 25*ai[i];
599 	vi    = aj + ai[i];
600 	nz    = diag[i] - ai[i];
601 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
602 	while (nz--) {
603 	  idx  = 5*(*vi++);
604 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
605 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
606 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
607 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
608 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
609 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
610 	  v   += 25;
611 	}
612 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
613 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
614 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
615 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
616 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
617         idiag   += 25;
618         i2      += 5;
619       }
620       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
621       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
622     }
623     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
624         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
625       i2    = 0;
626       mdiag = a->idiag+25*a->mbs;
627       for (i=0; i<m; i++) {
628         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
629         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
630         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
631         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
632         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
633         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
634         mdiag  += 25;
635         i2     += 5;
636       }
637       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
638     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
639       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
640     }
641     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
642       idiag   = a->idiag+25*a->mbs - 25;
643       i2      = 5*m - 5;
644       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
645       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
646       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
647       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
648       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
649       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
650       idiag -= 25;
651       i2    -= 5;
652       for (i=m-2; i>=0; i--) {
653 	v     = aa + 25*(diag[i]+1);
654 	vi    = aj + diag[i] + 1;
655 	nz    = ai[i+1] - diag[i] - 1;
656 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
657 	while (nz--) {
658 	  idx  = 5*(*vi++);
659 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
660 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
661 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
662 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
663 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
664 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
665 	  v   += 25;
666 	}
667 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
668 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
669 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
670 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
671 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
672         idiag   -= 25;
673         i2      -= 5;
674       }
675       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
676     }
677   } else {
678     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
679   }
680   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
681   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 #undef __FUNCT__
686 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
687 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
688 {
689   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
690   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
691   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
692   const PetscScalar  *b;
693   PetscErrorCode     ierr;
694   PetscInt           m = a->mbs,i,i2,nz,idx;
695   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
696 
697   PetscFunctionBegin;
698   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
699   its = its*lits;
700   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
701   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
702   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
703   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
704   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
705 
706   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
707 
708   diag  = a->diag;
709   idiag = a->idiag;
710   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
711   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
712 
713   if (flag & SOR_ZERO_INITIAL_GUESS) {
714     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
715       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
716       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
717       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
718       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
719       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
720       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
721       i2     = 6;
722       idiag += 36;
723       for (i=1; i<m; i++) {
724         v     = aa + 36*ai[i];
725         vi    = aj + ai[i];
726         nz    = diag[i] - ai[i];
727         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
728         while (nz--) {
729           idx  = 6*(*vi++);
730           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
731           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
732           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
733           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
734           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
735           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
736           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
737           v   += 36;
738         }
739         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
740         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
741         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
742         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
743         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
744         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
745         idiag   += 36;
746         i2      += 6;
747       }
748       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
749       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
750     }
751     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
752         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
753       i2    = 0;
754       mdiag = a->idiag+36*a->mbs;
755       for (i=0; i<m; i++) {
756         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
757         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
758         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
759         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
760         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
761         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
762         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
763         mdiag  += 36;
764         i2     += 6;
765       }
766       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
767     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
768       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
769     }
770     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
771       idiag   = a->idiag+36*a->mbs - 36;
772       i2      = 6*m - 6;
773       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
774       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
775       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
776       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
777       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
778       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
779       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
780       idiag -= 36;
781       i2    -= 6;
782       for (i=m-2; i>=0; i--) {
783         v     = aa + 36*(diag[i]+1);
784         vi    = aj + diag[i] + 1;
785         nz    = ai[i+1] - diag[i] - 1;
786         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
787         while (nz--) {
788           idx  = 6*(*vi++);
789           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
790           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
791           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
792           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
793           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
794           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
795           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
796           v   += 36;
797         }
798         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
799         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
800         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
801         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
802         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
803         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
804         idiag   -= 36;
805         i2      -= 6;
806       }
807       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
808     }
809   } else {
810     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
811   }
812   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
813   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNCT__
818 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
819 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
820 {
821   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
822   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
823   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
824   const PetscScalar  *b;
825   PetscErrorCode     ierr;
826   PetscInt           m = a->mbs,i,i2,nz,idx;
827   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
828 
829   PetscFunctionBegin;
830   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
831   its = its*lits;
832   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
833   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
834   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
835   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
836   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
837 
838   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
839 
840   diag  = a->diag;
841   idiag = a->idiag;
842   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
843   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
844 
845   if (flag & SOR_ZERO_INITIAL_GUESS) {
846     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
847       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
848       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
849       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
850       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
851       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
852       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
853       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
854       i2     = 7;
855       idiag += 49;
856       for (i=1; i<m; i++) {
857         v     = aa + 49*ai[i];
858         vi    = aj + ai[i];
859         nz    = diag[i] - ai[i];
860         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
861         while (nz--) {
862           idx  = 7*(*vi++);
863           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
864           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
865           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
866           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
867           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
868           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
869           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
870           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
871           v   += 49;
872         }
873         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
874         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
875         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
876         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
877         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
878         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
879         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
880         idiag   += 49;
881         i2      += 7;
882       }
883       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
884       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
885     }
886     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
887         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
888       i2    = 0;
889       mdiag = a->idiag+49*a->mbs;
890       for (i=0; i<m; i++) {
891         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
892         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
893         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
894         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
895         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
896         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
897         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
898         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
899         mdiag  += 36;
900         i2     += 6;
901       }
902       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
903     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
904       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
905     }
906     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
907       idiag   = a->idiag+49*a->mbs - 49;
908       i2      = 7*m - 7;
909       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
910       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
911       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
912       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
913       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
914       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
915       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
916       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
917       idiag -= 49;
918       i2    -= 7;
919       for (i=m-2; i>=0; i--) {
920         v     = aa + 49*(diag[i]+1);
921         vi    = aj + diag[i] + 1;
922         nz    = ai[i+1] - diag[i] - 1;
923         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
924         while (nz--) {
925           idx  = 7*(*vi++);
926           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
927           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
928           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
929           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
930           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
931           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
932           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
933           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
934           v   += 49;
935         }
936         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
937         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
938         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
939         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
940         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
941         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
942         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
943         idiag   -= 49;
944         i2      -= 7;
945       }
946       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
947     }
948   } else {
949     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
950   }
951   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
952   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
953   PetscFunctionReturn(0);
954 }
955 
956 /*
957     Special version for direct calls from Fortran (Used in PETSc-fun3d)
958 */
959 #if defined(PETSC_HAVE_FORTRAN_CAPS)
960 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
961 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
962 #define matsetvaluesblocked4_ matsetvaluesblocked4
963 #endif
964 
965 EXTERN_C_BEGIN
966 #undef __FUNCT__
967 #define __FUNCT__ "matsetvaluesblocked4_"
968 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
969 {
970   Mat               A = *AA;
971   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
972   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
973   PetscInt          *ai=a->i,*ailen=a->ilen;
974   PetscInt          *aj=a->j,stepval,lastcol = -1;
975   const PetscScalar *value = v;
976   MatScalar         *ap,*aa = a->a,*bap;
977 
978   PetscFunctionBegin;
979   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
980   stepval = (n-1)*4;
981   for (k=0; k<m; k++) { /* loop over added rows */
982     row  = im[k];
983     rp   = aj + ai[row];
984     ap   = aa + 16*ai[row];
985     nrow = ailen[row];
986     low  = 0;
987     high = nrow;
988     for (l=0; l<n; l++) { /* loop over added columns */
989       col = in[l];
990       if (col <= lastcol) low = 0; else high = nrow;
991       lastcol = col;
992       value = v + k*(stepval+4 + l)*4;
993       while (high-low > 7) {
994         t = (low+high)/2;
995         if (rp[t] > col) high = t;
996         else             low  = t;
997       }
998       for (i=low; i<high; i++) {
999         if (rp[i] > col) break;
1000         if (rp[i] == col) {
1001           bap  = ap +  16*i;
1002           for (ii=0; ii<4; ii++,value+=stepval) {
1003             for (jj=ii; jj<16; jj+=4) {
1004               bap[jj] += *value++;
1005             }
1006           }
1007           goto noinsert2;
1008         }
1009       }
1010       N = nrow++ - 1;
1011       high++; /* added new column index thus must search to one higher than before */
1012       /* shift up all the later entries in this row */
1013       for (ii=N; ii>=i; ii--) {
1014         rp[ii+1] = rp[ii];
1015         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1016       }
1017       if (N >= i) {
1018         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1019       }
1020       rp[i] = col;
1021       bap   = ap +  16*i;
1022       for (ii=0; ii<4; ii++,value+=stepval) {
1023         for (jj=ii; jj<16; jj+=4) {
1024           bap[jj] = *value++;
1025         }
1026       }
1027       noinsert2:;
1028       low = i;
1029     }
1030     ailen[row] = nrow;
1031   }
1032   PetscFunctionReturnVoid();
1033 }
1034 EXTERN_C_END
1035 
1036 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1037 #define matsetvalues4_ MATSETVALUES4
1038 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1039 #define matsetvalues4_ matsetvalues4
1040 #endif
1041 
1042 EXTERN_C_BEGIN
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatSetValues4_"
1045 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1046 {
1047   Mat         A = *AA;
1048   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1049   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1050   PetscInt    *ai=a->i,*ailen=a->ilen;
1051   PetscInt    *aj=a->j,brow,bcol;
1052   PetscInt    ridx,cidx,lastcol = -1;
1053   MatScalar   *ap,value,*aa=a->a,*bap;
1054 
1055   PetscFunctionBegin;
1056   for (k=0; k<m; k++) { /* loop over added rows */
1057     row  = im[k]; brow = row/4;
1058     rp   = aj + ai[brow];
1059     ap   = aa + 16*ai[brow];
1060     nrow = ailen[brow];
1061     low  = 0;
1062     high = nrow;
1063     for (l=0; l<n; l++) { /* loop over added columns */
1064       col = in[l]; bcol = col/4;
1065       ridx = row % 4; cidx = col % 4;
1066       value = v[l + k*n];
1067       if (col <= lastcol) low = 0; else high = nrow;
1068       lastcol = col;
1069       while (high-low > 7) {
1070         t = (low+high)/2;
1071         if (rp[t] > bcol) high = t;
1072         else              low  = t;
1073       }
1074       for (i=low; i<high; i++) {
1075         if (rp[i] > bcol) break;
1076         if (rp[i] == bcol) {
1077           bap  = ap +  16*i + 4*cidx + ridx;
1078           *bap += value;
1079           goto noinsert1;
1080         }
1081       }
1082       N = nrow++ - 1;
1083       high++; /* added new column thus must search to one higher than before */
1084       /* shift up all the later entries in this row */
1085       for (ii=N; ii>=i; ii--) {
1086         rp[ii+1] = rp[ii];
1087         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1088       }
1089       if (N>=i) {
1090         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1091       }
1092       rp[i]                    = bcol;
1093       ap[16*i + 4*cidx + ridx] = value;
1094       noinsert1:;
1095       low = i;
1096     }
1097     ailen[brow] = nrow;
1098   }
1099   PetscFunctionReturnVoid();
1100 }
1101 EXTERN_C_END
1102 
1103 /*
1104      Checks for missing diagonals
1105 */
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1108 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1109 {
1110   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1111   PetscErrorCode ierr;
1112   PetscInt       *diag,*jj = a->j,i;
1113 
1114   PetscFunctionBegin;
1115   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1116   *missing = PETSC_FALSE;
1117   if (A->rmap->n > 0 && !jj) {
1118     *missing  = PETSC_TRUE;
1119     if (d) *d = 0;
1120     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1121   } else {
1122     diag     = a->diag;
1123     for (i=0; i<a->mbs; i++) {
1124       if (jj[diag[i]] != i) {
1125         *missing  = PETSC_TRUE;
1126         if (d) *d = i;
1127         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1128       }
1129     }
1130   }
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1136 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1137 {
1138   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1139   PetscErrorCode ierr;
1140   PetscInt       i,j,m = a->mbs;
1141 
1142   PetscFunctionBegin;
1143   if (!a->diag) {
1144     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1145     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1146     a->free_diag = PETSC_TRUE;
1147   }
1148   for (i=0; i<m; i++) {
1149     a->diag[i] = a->i[i+1];
1150     for (j=a->i[i]; j<a->i[i+1]; j++) {
1151       if (a->j[j] == i) {
1152         a->diag[i] = j;
1153         break;
1154       }
1155     }
1156   }
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 
1161 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1165 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1166 {
1167   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1168   PetscErrorCode ierr;
1169   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1170   PetscInt       *tia, *tja;
1171 
1172   PetscFunctionBegin;
1173   *nn = n;
1174   if (!ia) PetscFunctionReturn(0);
1175   if (symmetric) {
1176     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1177     nz   = tia[n];
1178   } else {
1179     tia = a->i; tja = a->j;
1180   }
1181 
1182   if (!blockcompressed && bs > 1) {
1183     (*nn) *= bs;
1184     /* malloc & create the natural set of indices */
1185     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1186     if (n) {
1187       (*ia)[0] = 0;
1188       for (j=1; j<bs; j++) {
1189         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1190       }
1191     }
1192 
1193     for (i=1; i<n; i++) {
1194       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1195       for (j=1; j<bs; j++) {
1196         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1197       }
1198     }
1199     if (n) {
1200       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1201     }
1202 
1203     if (ja) {
1204       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1205       cnt = 0;
1206       for (i=0; i<n; i++) {
1207         for (j=0; j<bs; j++) {
1208           for (k=tia[i]; k<tia[i+1]; k++) {
1209             for (l=0; l<bs; l++) {
1210               (*ja)[cnt++] = bs*tja[k] + l;
1211 	    }
1212           }
1213         }
1214       }
1215     }
1216 
1217     n     *= bs;
1218     nz *= bs*bs;
1219     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1220       ierr = PetscFree(tia);CHKERRQ(ierr);
1221       ierr = PetscFree(tja);CHKERRQ(ierr);
1222     }
1223   } else if (oshift == 1) {
1224     if (symmetric) {
1225       PetscInt nz = tia[A->rmap->n/bs];
1226       /*  add 1 to i and j indices */
1227       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1228       *ia = tia;
1229       if (ja) {
1230 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1231         *ja = tja;
1232       }
1233     } else {
1234       PetscInt nz = a->i[A->rmap->n/bs];
1235       /* malloc space and  add 1 to i and j indices */
1236       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1237       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1238       if (ja) {
1239 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1240 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1241       }
1242     }
1243   } else {
1244     *ia = tia;
1245     if (ja) *ja = tja;
1246   }
1247 
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1253 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1254 {
1255   PetscErrorCode ierr;
1256 
1257   PetscFunctionBegin;
1258   if (!ia) PetscFunctionReturn(0);
1259   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1260     ierr = PetscFree(*ia);CHKERRQ(ierr);
1261     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1262   }
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1268 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1269 {
1270   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274 #if defined(PETSC_USE_LOG)
1275   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1276 #endif
1277   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1278   if (a->row) {
1279     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1280   }
1281   if (a->col) {
1282     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1283   }
1284   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1285   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1286   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1287   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1288   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1289   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1290   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1291   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1292   if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1293 
1294   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1295   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1296   ierr = PetscFree(a);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1312 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1313 {
1314   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   switch (op) {
1319   case MAT_ROW_ORIENTED:
1320     a->roworiented    = flg;
1321     break;
1322   case MAT_KEEP_NONZERO_PATTERN:
1323     a->keepnonzeropattern = flg;
1324     break;
1325   case MAT_NEW_NONZERO_LOCATIONS:
1326     a->nonew          = (flg ? 0 : 1);
1327     break;
1328   case MAT_NEW_NONZERO_LOCATION_ERR:
1329     a->nonew          = (flg ? -1 : 0);
1330     break;
1331   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1332     a->nonew          = (flg ? -2 : 0);
1333     break;
1334   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1335     a->nounused       = (flg ? -1 : 0);
1336     break;
1337   case MAT_NEW_DIAGONALS:
1338   case MAT_IGNORE_OFF_PROC_ENTRIES:
1339   case MAT_USE_HASH_TABLE:
1340     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1341     break;
1342   case MAT_SYMMETRIC:
1343   case MAT_STRUCTURALLY_SYMMETRIC:
1344   case MAT_HERMITIAN:
1345   case MAT_SYMMETRY_ETERNAL:
1346     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1347     break;
1348   default:
1349     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1350   }
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1356 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1357 {
1358   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1359   PetscErrorCode ierr;
1360   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1361   MatScalar      *aa,*aa_i;
1362   PetscScalar    *v_i;
1363 
1364   PetscFunctionBegin;
1365   bs  = A->rmap->bs;
1366   ai  = a->i;
1367   aj  = a->j;
1368   aa  = a->a;
1369   bs2 = a->bs2;
1370 
1371   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1372 
1373   bn  = row/bs;   /* Block number */
1374   bp  = row % bs; /* Block Position */
1375   M   = ai[bn+1] - ai[bn];
1376   *nz = bs*M;
1377 
1378   if (v) {
1379     *v = 0;
1380     if (*nz) {
1381       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1382       for (i=0; i<M; i++) { /* for each block in the block row */
1383         v_i  = *v + i*bs;
1384         aa_i = aa + bs2*(ai[bn] + i);
1385         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1386       }
1387     }
1388   }
1389 
1390   if (idx) {
1391     *idx = 0;
1392     if (*nz) {
1393       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1394       for (i=0; i<M; i++) { /* for each block in the block row */
1395         idx_i = *idx + i*bs;
1396         itmp  = bs*aj[ai[bn] + i];
1397         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1398       }
1399     }
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1406 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1407 {
1408   PetscErrorCode ierr;
1409 
1410   PetscFunctionBegin;
1411   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1412   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1420 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1421 {
1422   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1423   Mat            C;
1424   PetscErrorCode ierr;
1425   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1426   PetscInt       *rows,*cols,bs2=a->bs2;
1427   MatScalar      *array;
1428 
1429   PetscFunctionBegin;
1430   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1431   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1432     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1433     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1434 
1435     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1436     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1437     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1438     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1439     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1440     ierr = PetscFree(col);CHKERRQ(ierr);
1441   } else {
1442     C = *B;
1443   }
1444 
1445   array = a->a;
1446   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1447   for (i=0; i<mbs; i++) {
1448     cols[0] = i*bs;
1449     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1450     len = ai[i+1] - ai[i];
1451     for (j=0; j<len; j++) {
1452       rows[0] = (*aj++)*bs;
1453       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1454       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1455       array += bs2;
1456     }
1457   }
1458   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1459 
1460   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1461   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1462 
1463   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1464     *B = C;
1465   } else {
1466     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1467   }
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1473 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1474 {
1475   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1476   PetscErrorCode ierr;
1477   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1478   int            fd;
1479   PetscScalar    *aa;
1480   FILE           *file;
1481 
1482   PetscFunctionBegin;
1483   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1484   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1485   col_lens[0] = MAT_FILE_CLASSID;
1486 
1487   col_lens[1] = A->rmap->N;
1488   col_lens[2] = A->cmap->n;
1489   col_lens[3] = a->nz*bs2;
1490 
1491   /* store lengths of each row and write (including header) to file */
1492   count = 0;
1493   for (i=0; i<a->mbs; i++) {
1494     for (j=0; j<bs; j++) {
1495       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1496     }
1497   }
1498   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1499   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1500 
1501   /* store column indices (zero start index) */
1502   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1503   count = 0;
1504   for (i=0; i<a->mbs; i++) {
1505     for (j=0; j<bs; j++) {
1506       for (k=a->i[i]; k<a->i[i+1]; k++) {
1507         for (l=0; l<bs; l++) {
1508           jj[count++] = bs*a->j[k] + l;
1509         }
1510       }
1511     }
1512   }
1513   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1514   ierr = PetscFree(jj);CHKERRQ(ierr);
1515 
1516   /* store nonzero values */
1517   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1518   count = 0;
1519   for (i=0; i<a->mbs; i++) {
1520     for (j=0; j<bs; j++) {
1521       for (k=a->i[i]; k<a->i[i+1]; k++) {
1522         for (l=0; l<bs; l++) {
1523           aa[count++] = a->a[bs2*k + l*bs + j];
1524         }
1525       }
1526     }
1527   }
1528   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1529   ierr = PetscFree(aa);CHKERRQ(ierr);
1530 
1531   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1532   if (file) {
1533     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1534   }
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1540 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1541 {
1542   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1543   PetscErrorCode    ierr;
1544   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1545   PetscViewerFormat format;
1546 
1547   PetscFunctionBegin;
1548   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1549   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1550     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1551   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1552     Mat aij;
1553     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1554     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1555     ierr = MatDestroy(aij);CHKERRQ(ierr);
1556   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1557      PetscFunctionReturn(0);
1558   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1559     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1560     for (i=0; i<a->mbs; i++) {
1561       for (j=0; j<bs; j++) {
1562         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1563         for (k=a->i[i]; k<a->i[i+1]; k++) {
1564           for (l=0; l<bs; l++) {
1565 #if defined(PETSC_USE_COMPLEX)
1566             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1568                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1571                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1572             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1573               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1574             }
1575 #else
1576             if (a->a[bs2*k + l*bs + j] != 0.0) {
1577               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1578             }
1579 #endif
1580           }
1581         }
1582         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1583       }
1584     }
1585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1586   } else {
1587     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1588     for (i=0; i<a->mbs; i++) {
1589       for (j=0; j<bs; j++) {
1590         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1591         for (k=a->i[i]; k<a->i[i+1]; k++) {
1592           for (l=0; l<bs; l++) {
1593 #if defined(PETSC_USE_COMPLEX)
1594             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1595               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1596                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1597             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1598               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1599                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1600             } else {
1601               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1602             }
1603 #else
1604             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1605 #endif
1606           }
1607         }
1608         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1609       }
1610     }
1611     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1612   }
1613   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1619 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1620 {
1621   Mat            A = (Mat) Aa;
1622   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1623   PetscErrorCode ierr;
1624   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1625   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1626   MatScalar      *aa;
1627   PetscViewer    viewer;
1628 
1629   PetscFunctionBegin;
1630 
1631   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1632   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1633 
1634   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1635 
1636   /* loop over matrix elements drawing boxes */
1637   color = PETSC_DRAW_BLUE;
1638   for (i=0,row=0; i<mbs; i++,row+=bs) {
1639     for (j=a->i[i]; j<a->i[i+1]; j++) {
1640       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1641       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1642       aa = a->a + j*bs2;
1643       for (k=0; k<bs; k++) {
1644         for (l=0; l<bs; l++) {
1645           if (PetscRealPart(*aa++) >=  0.) continue;
1646           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1647         }
1648       }
1649     }
1650   }
1651   color = PETSC_DRAW_CYAN;
1652   for (i=0,row=0; i<mbs; i++,row+=bs) {
1653     for (j=a->i[i]; j<a->i[i+1]; j++) {
1654       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1655       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1656       aa = a->a + j*bs2;
1657       for (k=0; k<bs; k++) {
1658         for (l=0; l<bs; l++) {
1659           if (PetscRealPart(*aa++) != 0.) continue;
1660           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1661         }
1662       }
1663     }
1664   }
1665 
1666   color = PETSC_DRAW_RED;
1667   for (i=0,row=0; i<mbs; i++,row+=bs) {
1668     for (j=a->i[i]; j<a->i[i+1]; j++) {
1669       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1670       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1671       aa = a->a + j*bs2;
1672       for (k=0; k<bs; k++) {
1673         for (l=0; l<bs; l++) {
1674           if (PetscRealPart(*aa++) <= 0.) continue;
1675           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1676         }
1677       }
1678     }
1679   }
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1685 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1686 {
1687   PetscErrorCode ierr;
1688   PetscReal      xl,yl,xr,yr,w,h;
1689   PetscDraw      draw;
1690   PetscTruth     isnull;
1691 
1692   PetscFunctionBegin;
1693 
1694   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1695   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1696 
1697   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1698   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1699   xr += w;    yr += h;  xl = -w;     yl = -h;
1700   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1701   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1702   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatView_SeqBAIJ"
1708 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1709 {
1710   PetscErrorCode ierr;
1711   PetscTruth     iascii,isbinary,isdraw;
1712 
1713   PetscFunctionBegin;
1714   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1715   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1716   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1717   if (iascii){
1718     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1719   } else if (isbinary) {
1720     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1721   } else if (isdraw) {
1722     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1723   } else {
1724     Mat B;
1725     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1726     ierr = MatView(B,viewer);CHKERRQ(ierr);
1727     ierr = MatDestroy(B);CHKERRQ(ierr);
1728   }
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1735 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1736 {
1737   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1738   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1739   PetscInt    *ai = a->i,*ailen = a->ilen;
1740   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1741   MatScalar   *ap,*aa = a->a;
1742 
1743   PetscFunctionBegin;
1744   for (k=0; k<m; k++) { /* loop over rows */
1745     row  = im[k]; brow = row/bs;
1746     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1747     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1748     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1749     nrow = ailen[brow];
1750     for (l=0; l<n; l++) { /* loop over columns */
1751       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1752       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1753       col  = in[l] ;
1754       bcol = col/bs;
1755       cidx = col%bs;
1756       ridx = row%bs;
1757       high = nrow;
1758       low  = 0; /* assume unsorted */
1759       while (high-low > 5) {
1760         t = (low+high)/2;
1761         if (rp[t] > bcol) high = t;
1762         else             low  = t;
1763       }
1764       for (i=low; i<high; i++) {
1765         if (rp[i] > bcol) break;
1766         if (rp[i] == bcol) {
1767           *v++ = ap[bs2*i+bs*cidx+ridx];
1768           goto finished;
1769         }
1770       }
1771       *v++ = 0.0;
1772       finished:;
1773     }
1774   }
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #undef __FUNCT__
1779 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1780 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1781 {
1782   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1783   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1784   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1785   PetscErrorCode    ierr;
1786   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1787   PetscTruth        roworiented=a->roworiented;
1788   const PetscScalar *value = v;
1789   MatScalar         *ap,*aa = a->a,*bap;
1790 
1791   PetscFunctionBegin;
1792   if (roworiented) {
1793     stepval = (n-1)*bs;
1794   } else {
1795     stepval = (m-1)*bs;
1796   }
1797   for (k=0; k<m; k++) { /* loop over added rows */
1798     row  = im[k];
1799     if (row < 0) continue;
1800 #if defined(PETSC_USE_DEBUG)
1801     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1802 #endif
1803     rp   = aj + ai[row];
1804     ap   = aa + bs2*ai[row];
1805     rmax = imax[row];
1806     nrow = ailen[row];
1807     low  = 0;
1808     high = nrow;
1809     for (l=0; l<n; l++) { /* loop over added columns */
1810       if (in[l] < 0) continue;
1811 #if defined(PETSC_USE_DEBUG)
1812       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1813 #endif
1814       col = in[l];
1815       if (roworiented) {
1816         value = v + k*(stepval+bs)*bs + l*bs;
1817       } else {
1818         value = v + l*(stepval+bs)*bs + k*bs;
1819       }
1820       if (col <= lastcol) low = 0; else high = nrow;
1821       lastcol = col;
1822       while (high-low > 7) {
1823         t = (low+high)/2;
1824         if (rp[t] > col) high = t;
1825         else             low  = t;
1826       }
1827       for (i=low; i<high; i++) {
1828         if (rp[i] > col) break;
1829         if (rp[i] == col) {
1830           bap  = ap +  bs2*i;
1831           if (roworiented) {
1832             if (is == ADD_VALUES) {
1833               for (ii=0; ii<bs; ii++,value+=stepval) {
1834                 for (jj=ii; jj<bs2; jj+=bs) {
1835                   bap[jj] += *value++;
1836                 }
1837               }
1838             } else {
1839               for (ii=0; ii<bs; ii++,value+=stepval) {
1840                 for (jj=ii; jj<bs2; jj+=bs) {
1841                   bap[jj] = *value++;
1842                 }
1843               }
1844             }
1845           } else {
1846             if (is == ADD_VALUES) {
1847               for (ii=0; ii<bs; ii++,value+=stepval) {
1848                 for (jj=0; jj<bs; jj++) {
1849                   *bap++ += *value++;
1850                 }
1851               }
1852             } else {
1853               for (ii=0; ii<bs; ii++,value+=stepval) {
1854                 for (jj=0; jj<bs; jj++) {
1855                   *bap++  = *value++;
1856                 }
1857               }
1858             }
1859           }
1860           goto noinsert2;
1861         }
1862       }
1863       if (nonew == 1) goto noinsert2;
1864       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1865       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1866       N = nrow++ - 1; high++;
1867       /* shift up all the later entries in this row */
1868       for (ii=N; ii>=i; ii--) {
1869         rp[ii+1] = rp[ii];
1870         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1871       }
1872       if (N >= i) {
1873         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1874       }
1875       rp[i] = col;
1876       bap   = ap +  bs2*i;
1877       if (roworiented) {
1878         for (ii=0; ii<bs; ii++,value+=stepval) {
1879           for (jj=ii; jj<bs2; jj+=bs) {
1880             bap[jj] = *value++;
1881           }
1882         }
1883       } else {
1884         for (ii=0; ii<bs; ii++,value+=stepval) {
1885           for (jj=0; jj<bs; jj++) {
1886             *bap++  = *value++;
1887           }
1888         }
1889       }
1890       noinsert2:;
1891       low = i;
1892     }
1893     ailen[row] = nrow;
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1900 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1901 {
1902   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1903   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1904   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1905   PetscErrorCode ierr;
1906   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1907   MatScalar      *aa = a->a,*ap;
1908   PetscReal      ratio=0.6;
1909 
1910   PetscFunctionBegin;
1911   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1912 
1913   if (m) rmax = ailen[0];
1914   for (i=1; i<mbs; i++) {
1915     /* move each row back by the amount of empty slots (fshift) before it*/
1916     fshift += imax[i-1] - ailen[i-1];
1917     rmax   = PetscMax(rmax,ailen[i]);
1918     if (fshift) {
1919       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1920       N = ailen[i];
1921       for (j=0; j<N; j++) {
1922         ip[j-fshift] = ip[j];
1923         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1924       }
1925     }
1926     ai[i] = ai[i-1] + ailen[i-1];
1927   }
1928   if (mbs) {
1929     fshift += imax[mbs-1] - ailen[mbs-1];
1930     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1931   }
1932   /* reset ilen and imax for each row */
1933   for (i=0; i<mbs; i++) {
1934     ailen[i] = imax[i] = ai[i+1] - ai[i];
1935   }
1936   a->nz = ai[mbs];
1937 
1938   /* diagonals may have moved, so kill the diagonal pointers */
1939   a->idiagvalid = PETSC_FALSE;
1940   if (fshift && a->diag) {
1941     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1942     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1943     a->diag = 0;
1944   }
1945   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1946   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1947   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1948   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1949   A->info.mallocs     += a->reallocs;
1950   a->reallocs          = 0;
1951   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1952 
1953   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1954   if (a->compressedrow.use){
1955     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1956   }
1957 
1958   A->same_nonzero = PETSC_TRUE;
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 /*
1963    This function returns an array of flags which indicate the locations of contiguous
1964    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1965    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1966    Assume: sizes should be long enough to hold all the values.
1967 */
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1970 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1971 {
1972   PetscInt   i,j,k,row;
1973   PetscTruth flg;
1974 
1975   PetscFunctionBegin;
1976   for (i=0,j=0; i<n; j++) {
1977     row = idx[i];
1978     if (row%bs!=0) { /* Not the begining of a block */
1979       sizes[j] = 1;
1980       i++;
1981     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1982       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1983       i++;
1984     } else { /* Begining of the block, so check if the complete block exists */
1985       flg = PETSC_TRUE;
1986       for (k=1; k<bs; k++) {
1987         if (row+k != idx[i+k]) { /* break in the block */
1988           flg = PETSC_FALSE;
1989           break;
1990         }
1991       }
1992       if (flg) { /* No break in the bs */
1993         sizes[j] = bs;
1994         i+= bs;
1995       } else {
1996         sizes[j] = 1;
1997         i++;
1998       }
1999     }
2000   }
2001   *bs_max = j;
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 #undef __FUNCT__
2006 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2007 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2008 {
2009   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2010   PetscErrorCode ierr;
2011   PetscInt       i,j,k,count,*rows;
2012   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2013   PetscScalar    zero = 0.0;
2014   MatScalar      *aa;
2015 
2016   PetscFunctionBegin;
2017   /* Make a copy of the IS and  sort it */
2018   /* allocate memory for rows,sizes */
2019   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2020 
2021   /* copy IS values to rows, and sort them */
2022   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2023   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2024   if (baij->keepnonzeropattern) {
2025     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2026     bs_max = is_n;
2027     A->same_nonzero = PETSC_TRUE;
2028   } else {
2029     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2030     A->same_nonzero = PETSC_FALSE;
2031   }
2032 
2033   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2034     row   = rows[j];
2035     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2036     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2037     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2038     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2039       if (diag != 0.0) {
2040         if (baij->ilen[row/bs] > 0) {
2041           baij->ilen[row/bs]       = 1;
2042           baij->j[baij->i[row/bs]] = row/bs;
2043           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2044         }
2045         /* Now insert all the diagonal values for this bs */
2046         for (k=0; k<bs; k++) {
2047           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2048         }
2049       } else { /* (diag == 0.0) */
2050         baij->ilen[row/bs] = 0;
2051       } /* end (diag == 0.0) */
2052     } else { /* (sizes[i] != bs) */
2053 #if defined (PETSC_USE_DEBUG)
2054       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2055 #endif
2056       for (k=0; k<count; k++) {
2057         aa[0] =  zero;
2058         aa    += bs;
2059       }
2060       if (diag != 0.0) {
2061         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2062       }
2063     }
2064   }
2065 
2066   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2067   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 #undef __FUNCT__
2072 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2073 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2074 {
2075   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2076   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2077   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2078   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2079   PetscErrorCode ierr;
2080   PetscInt       ridx,cidx,bs2=a->bs2;
2081   PetscTruth     roworiented=a->roworiented;
2082   MatScalar      *ap,value,*aa=a->a,*bap;
2083 
2084   PetscFunctionBegin;
2085   if (v) PetscValidScalarPointer(v,6);
2086   for (k=0; k<m; k++) { /* loop over added rows */
2087     row  = im[k];
2088     brow = row/bs;
2089     if (row < 0) continue;
2090 #if defined(PETSC_USE_DEBUG)
2091     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2092 #endif
2093     rp   = aj + ai[brow];
2094     ap   = aa + bs2*ai[brow];
2095     rmax = imax[brow];
2096     nrow = ailen[brow];
2097     low  = 0;
2098     high = nrow;
2099     for (l=0; l<n; l++) { /* loop over added columns */
2100       if (in[l] < 0) continue;
2101 #if defined(PETSC_USE_DEBUG)
2102       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2103 #endif
2104       col = in[l]; bcol = col/bs;
2105       ridx = row % bs; cidx = col % bs;
2106       if (roworiented) {
2107         value = v[l + k*n];
2108       } else {
2109         value = v[k + l*m];
2110       }
2111       if (col <= lastcol) low = 0; else high = nrow;
2112       lastcol = col;
2113       while (high-low > 7) {
2114         t = (low+high)/2;
2115         if (rp[t] > bcol) high = t;
2116         else              low  = t;
2117       }
2118       for (i=low; i<high; i++) {
2119         if (rp[i] > bcol) break;
2120         if (rp[i] == bcol) {
2121           bap  = ap +  bs2*i + bs*cidx + ridx;
2122           if (is == ADD_VALUES) *bap += value;
2123           else                  *bap  = value;
2124           goto noinsert1;
2125         }
2126       }
2127       if (nonew == 1) goto noinsert1;
2128       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2129       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2130       N = nrow++ - 1; high++;
2131       /* shift up all the later entries in this row */
2132       for (ii=N; ii>=i; ii--) {
2133         rp[ii+1] = rp[ii];
2134         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2135       }
2136       if (N>=i) {
2137         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2138       }
2139       rp[i]                      = bcol;
2140       ap[bs2*i + bs*cidx + ridx] = value;
2141       a->nz++;
2142       noinsert1:;
2143       low = i;
2144     }
2145     ailen[brow] = nrow;
2146   }
2147   A->same_nonzero = PETSC_FALSE;
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2153 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2154 {
2155   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2156   Mat            outA;
2157   PetscErrorCode ierr;
2158   PetscTruth     row_identity,col_identity;
2159 
2160   PetscFunctionBegin;
2161   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2162   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2163   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2164   if (!row_identity || !col_identity) {
2165     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2166   }
2167 
2168   outA            = inA;
2169   inA->factortype = MAT_FACTOR_LU;
2170 
2171   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2172 
2173   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2174   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2175   a->row = row;
2176   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2177   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2178   a->col = col;
2179 
2180   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2181   if (a->icol) {
2182     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2183   }
2184   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2185   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2186 
2187   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2188   if (!a->solve_work) {
2189     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2190     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2191   }
2192   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2193 
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 EXTERN_C_BEGIN
2198 #undef __FUNCT__
2199 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2200 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2201 {
2202   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2203   PetscInt    i,nz,mbs;
2204 
2205   PetscFunctionBegin;
2206   nz  = baij->maxnz/baij->bs2;
2207   mbs = baij->mbs;
2208   for (i=0; i<nz; i++) {
2209     baij->j[i] = indices[i];
2210   }
2211   baij->nz = nz;
2212   for (i=0; i<mbs; i++) {
2213     baij->ilen[i] = baij->imax[i];
2214   }
2215   PetscFunctionReturn(0);
2216 }
2217 EXTERN_C_END
2218 
2219 #undef __FUNCT__
2220 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2221 /*@
2222     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2223        in the matrix.
2224 
2225   Input Parameters:
2226 +  mat - the SeqBAIJ matrix
2227 -  indices - the column indices
2228 
2229   Level: advanced
2230 
2231   Notes:
2232     This can be called if you have precomputed the nonzero structure of the
2233   matrix and want to provide it to the matrix object to improve the performance
2234   of the MatSetValues() operation.
2235 
2236     You MUST have set the correct numbers of nonzeros per row in the call to
2237   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2238 
2239     MUST be called before any calls to MatSetValues();
2240 
2241 @*/
2242 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2243 {
2244   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2245 
2246   PetscFunctionBegin;
2247   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2248   PetscValidPointer(indices,2);
2249   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2250   if (f) {
2251     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2252   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2258 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2259 {
2260   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2261   PetscErrorCode ierr;
2262   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2263   PetscReal      atmp;
2264   PetscScalar    *x,zero = 0.0;
2265   MatScalar      *aa;
2266   PetscInt       ncols,brow,krow,kcol;
2267 
2268   PetscFunctionBegin;
2269   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2270   bs   = A->rmap->bs;
2271   aa   = a->a;
2272   ai   = a->i;
2273   aj   = a->j;
2274   mbs  = a->mbs;
2275 
2276   ierr = VecSet(v,zero);CHKERRQ(ierr);
2277   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2278   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2279   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2280   for (i=0; i<mbs; i++) {
2281     ncols = ai[1] - ai[0]; ai++;
2282     brow  = bs*i;
2283     for (j=0; j<ncols; j++){
2284       for (kcol=0; kcol<bs; kcol++){
2285         for (krow=0; krow<bs; krow++){
2286           atmp = PetscAbsScalar(*aa);aa++;
2287           row = brow + krow;    /* row index */
2288           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2289           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2290         }
2291       }
2292       aj++;
2293     }
2294   }
2295   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 #undef __FUNCT__
2300 #define __FUNCT__ "MatCopy_SeqBAIJ"
2301 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2302 {
2303   PetscErrorCode ierr;
2304 
2305   PetscFunctionBegin;
2306   /* If the two matrices have the same copy implementation, use fast copy. */
2307   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2308     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2309     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2310 
2311     if (a->i[A->rmap->N] != b->i[B->rmap->N]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2312     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2313   } else {
2314     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2315   }
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 #undef __FUNCT__
2320 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2321 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2322 {
2323   PetscErrorCode ierr;
2324 
2325   PetscFunctionBegin;
2326   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2327   PetscFunctionReturn(0);
2328 }
2329 
2330 #undef __FUNCT__
2331 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2332 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2333 {
2334   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2335   PetscFunctionBegin;
2336   *array = a->a;
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2342 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2343 {
2344   PetscFunctionBegin;
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2350 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2351 {
2352   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2353   PetscErrorCode ierr;
2354   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2355   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2356 
2357   PetscFunctionBegin;
2358   if (str == SAME_NONZERO_PATTERN) {
2359     PetscScalar alpha = a;
2360     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2361   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2362     if (y->xtoy && y->XtoY != X) {
2363       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2364       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2365     }
2366     if (!y->xtoy) { /* get xtoy */
2367       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2368       y->XtoY = X;
2369       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2370     }
2371     bs2 = bs*bs;
2372     for (i=0; i<x->nz; i++) {
2373       j = 0;
2374       while (j < bs2){
2375         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2376         j++;
2377       }
2378     }
2379     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2380   } else {
2381     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2382   }
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 #undef __FUNCT__
2387 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2388 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2389 {
2390   PetscInt rbs,cbs;
2391   PetscErrorCode ierr;
2392 
2393   PetscFunctionBegin;
2394   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2395   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2396   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2397   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2403 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2404 {
2405   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2406   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2407   MatScalar      *aa = a->a;
2408 
2409   PetscFunctionBegin;
2410   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 #undef __FUNCT__
2415 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2416 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2417 {
2418   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2419   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2420   MatScalar      *aa = a->a;
2421 
2422   PetscFunctionBegin;
2423   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2424   PetscFunctionReturn(0);
2425 }
2426 
2427 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2431 /*
2432     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2433 */
2434 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2435 {
2436   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2437   PetscErrorCode ierr;
2438   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2439   PetscInt       nz = a->i[m],row,*jj,mr,col;
2440 
2441   PetscFunctionBegin;
2442   *nn = n;
2443   if (!ia) PetscFunctionReturn(0);
2444   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2445   else {
2446     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2447     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2448     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2449     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2450     jj = a->j;
2451     for (i=0; i<nz; i++) {
2452       collengths[jj[i]]++;
2453     }
2454     cia[0] = oshift;
2455     for (i=0; i<n; i++) {
2456       cia[i+1] = cia[i] + collengths[i];
2457     }
2458     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2459     jj   = a->j;
2460     for (row=0; row<m; row++) {
2461       mr = a->i[row+1] - a->i[row];
2462       for (i=0; i<mr; i++) {
2463         col = *jj++;
2464         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2465       }
2466     }
2467     ierr = PetscFree(collengths);CHKERRQ(ierr);
2468     *ia = cia; *ja = cja;
2469   }
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2475 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2476 {
2477   PetscErrorCode ierr;
2478 
2479   PetscFunctionBegin;
2480   if (!ia) PetscFunctionReturn(0);
2481   ierr = PetscFree(*ia);CHKERRQ(ierr);
2482   ierr = PetscFree(*ja);CHKERRQ(ierr);
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2488 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2489 {
2490   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2491   PetscErrorCode ierr;
2492   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2493   PetscScalar    dx,*y,*xx,*w3_array;
2494   PetscScalar    *vscale_array;
2495   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2496   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2497   void           *fctx = coloring->fctx;
2498   PetscTruth     flg = PETSC_FALSE;
2499   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2500   Vec            x1_tmp;
2501 
2502   PetscFunctionBegin;
2503   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2504   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2505   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2506   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2507 
2508   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2509   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2510   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2511   if (flg) {
2512     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2513   } else {
2514     PetscTruth assembled;
2515     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2516     if (assembled) {
2517       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2518     }
2519   }
2520 
2521   x1_tmp = x1;
2522   if (!coloring->vscale){
2523     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2524   }
2525 
2526   /*
2527     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2528     coloring->F for the coarser grids from the finest
2529   */
2530   if (coloring->F) {
2531     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2532     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2533     if (m1 != m2) {
2534       coloring->F = 0;
2535       }
2536     }
2537 
2538   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2539     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2540   }
2541   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2542 
2543   /* Set w1 = F(x1) */
2544   if (coloring->F) {
2545     w1          = coloring->F; /* use already computed value of function */
2546     coloring->F = 0;
2547   } else {
2548     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2549     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2550     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2551   }
2552 
2553   if (!coloring->w3) {
2554     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2555     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2556   }
2557   w3 = coloring->w3;
2558 
2559     CHKMEMQ;
2560     /* Compute all the local scale factors, including ghost points */
2561   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2562   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2563   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2564   if (ctype == IS_COLORING_GHOSTED){
2565     col_start = 0; col_end = N;
2566   } else if (ctype == IS_COLORING_GLOBAL){
2567     xx = xx - start;
2568     vscale_array = vscale_array - start;
2569     col_start = start; col_end = N + start;
2570   }    CHKMEMQ;
2571   for (col=col_start; col<col_end; col++){
2572     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2573     if (coloring->htype[0] == 'w') {
2574       dx = 1.0 + unorm;
2575     } else {
2576       dx  = xx[col];
2577     }
2578     if (dx == 0.0) dx = 1.0;
2579 #if !defined(PETSC_USE_COMPLEX)
2580     if (dx < umin && dx >= 0.0)      dx = umin;
2581     else if (dx < 0.0 && dx > -umin) dx = -umin;
2582 #else
2583     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2584     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2585 #endif
2586     dx               *= epsilon;
2587     vscale_array[col] = 1.0/dx;
2588   }     CHKMEMQ;
2589   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2590   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2591   if (ctype == IS_COLORING_GLOBAL){
2592     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2593     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2594   }
2595   CHKMEMQ;
2596   if (coloring->vscaleforrow) {
2597     vscaleforrow = coloring->vscaleforrow;
2598   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2599 
2600   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2601   /*
2602     Loop over each color
2603   */
2604   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2605   for (k=0; k<coloring->ncolors; k++) {
2606     coloring->currentcolor = k;
2607     for (i=0; i<bs; i++) {
2608       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2609       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2610       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2611       /*
2612 	Loop over each column associated with color
2613 	adding the perturbation to the vector w3.
2614       */
2615       for (l=0; l<coloring->ncolumns[k]; l++) {
2616 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2617 	if (coloring->htype[0] == 'w') {
2618 	  dx = 1.0 + unorm;
2619 	} else {
2620 	  dx  = xx[col];
2621 	}
2622 	if (dx == 0.0) dx = 1.0;
2623 #if !defined(PETSC_USE_COMPLEX)
2624 	if (dx < umin && dx >= 0.0)      dx = umin;
2625 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2626 #else
2627 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2628 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2629 #endif
2630 	dx            *= epsilon;
2631 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2632 	w3_array[col] += dx;
2633       }
2634       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2635       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2636 
2637       /*
2638 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2639 	w2 = F(x1 + dx) - F(x1)
2640       */
2641       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2642       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2643       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2644       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2645 
2646       /*
2647 	Loop over rows of vector, putting results into Jacobian matrix
2648       */
2649       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2650       for (l=0; l<coloring->nrows[k]; l++) {
2651 	row    = bs*coloring->rows[k][l];             /* local row index */
2652 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2653         for (j=0; j<bs; j++) {
2654   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2655           srows[j]  = row + start + j;
2656         }
2657 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2658       }
2659       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2660     }
2661   } /* endof for each color */
2662   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2663   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2664   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2665   ierr = PetscFree(srows);CHKERRQ(ierr);
2666 
2667   coloring->currentcolor = -1;
2668   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2669   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2670   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2671   PetscFunctionReturn(0);
2672 }
2673 
2674 /* -------------------------------------------------------------------*/
2675 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2676        MatGetRow_SeqBAIJ,
2677        MatRestoreRow_SeqBAIJ,
2678        MatMult_SeqBAIJ_N,
2679 /* 4*/ MatMultAdd_SeqBAIJ_N,
2680        MatMultTranspose_SeqBAIJ,
2681        MatMultTransposeAdd_SeqBAIJ,
2682        0,
2683        0,
2684        0,
2685 /*10*/ 0,
2686        MatLUFactor_SeqBAIJ,
2687        0,
2688        0,
2689        MatTranspose_SeqBAIJ,
2690 /*15*/ MatGetInfo_SeqBAIJ,
2691        MatEqual_SeqBAIJ,
2692        MatGetDiagonal_SeqBAIJ,
2693        MatDiagonalScale_SeqBAIJ,
2694        MatNorm_SeqBAIJ,
2695 /*20*/ 0,
2696        MatAssemblyEnd_SeqBAIJ,
2697        MatSetOption_SeqBAIJ,
2698        MatZeroEntries_SeqBAIJ,
2699 /*24*/ MatZeroRows_SeqBAIJ,
2700        0,
2701        0,
2702        0,
2703        0,
2704 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2705        0,
2706        0,
2707        MatGetArray_SeqBAIJ,
2708        MatRestoreArray_SeqBAIJ,
2709 /*34*/ MatDuplicate_SeqBAIJ,
2710        0,
2711        0,
2712        MatILUFactor_SeqBAIJ,
2713        0,
2714 /*39*/ MatAXPY_SeqBAIJ,
2715        MatGetSubMatrices_SeqBAIJ,
2716        MatIncreaseOverlap_SeqBAIJ,
2717        MatGetValues_SeqBAIJ,
2718        MatCopy_SeqBAIJ,
2719 /*44*/ 0,
2720        MatScale_SeqBAIJ,
2721        0,
2722        0,
2723        0,
2724 /*49*/ MatSetBlockSize_SeqBAIJ,
2725        MatGetRowIJ_SeqBAIJ,
2726        MatRestoreRowIJ_SeqBAIJ,
2727        MatGetColumnIJ_SeqBAIJ,
2728        MatRestoreColumnIJ_SeqBAIJ,
2729 /*54*/ MatFDColoringCreate_SeqAIJ,
2730        0,
2731        0,
2732        0,
2733        MatSetValuesBlocked_SeqBAIJ,
2734 /*59*/ MatGetSubMatrix_SeqBAIJ,
2735        MatDestroy_SeqBAIJ,
2736        MatView_SeqBAIJ,
2737        0,
2738        0,
2739 /*64*/ 0,
2740        0,
2741        0,
2742        0,
2743        0,
2744 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2745        0,
2746        MatConvert_Basic,
2747        0,
2748        0,
2749 /*74*/ 0,
2750        MatFDColoringApply_BAIJ,
2751        0,
2752        0,
2753        0,
2754 /*79*/ 0,
2755        0,
2756        0,
2757        0,
2758        0,
2759 /*84*/ 0,
2760        0,
2761        0,
2762        0,
2763        0,
2764 /*89*/ 0,
2765        0,
2766        0,
2767        0,
2768        0,
2769 /*94*/ 0,
2770        0,
2771        0,
2772        0,
2773        0,
2774 /*99*/0,
2775        0,
2776        0,
2777        0,
2778        0,
2779 /*104*/0,
2780        MatRealPart_SeqBAIJ,
2781        MatImaginaryPart_SeqBAIJ,
2782        0,
2783        0,
2784 /*109*/0,
2785        0,
2786        0,
2787        0,
2788        MatMissingDiagonal_SeqBAIJ,
2789 /*114*/0,
2790        0,
2791        0,
2792        0,
2793        0,
2794 /*119*/0,
2795        0,
2796        MatMultHermitianTranspose_SeqBAIJ,
2797        MatMultHermitianTransposeAdd_SeqBAIJ,
2798        MatLoadnew_SeqBAIJ
2799 };
2800 
2801 EXTERN_C_BEGIN
2802 #undef __FUNCT__
2803 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2804 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2805 {
2806   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2807   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2808   PetscErrorCode ierr;
2809 
2810   PetscFunctionBegin;
2811   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2812 
2813   /* allocate space for values if not already there */
2814   if (!aij->saved_values) {
2815     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2816     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2817   }
2818 
2819   /* copy values over */
2820   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2821   PetscFunctionReturn(0);
2822 }
2823 EXTERN_C_END
2824 
2825 EXTERN_C_BEGIN
2826 #undef __FUNCT__
2827 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2828 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2829 {
2830   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2831   PetscErrorCode ierr;
2832   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2833 
2834   PetscFunctionBegin;
2835   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2836   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2837 
2838   /* copy values over */
2839   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2840   PetscFunctionReturn(0);
2841 }
2842 EXTERN_C_END
2843 
2844 EXTERN_C_BEGIN
2845 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2846 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2847 EXTERN_C_END
2848 
2849 EXTERN_C_BEGIN
2850 #undef __FUNCT__
2851 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2852 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2853 {
2854   Mat_SeqBAIJ    *b;
2855   PetscErrorCode ierr;
2856   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2857   PetscTruth     flg,skipallocation = PETSC_FALSE;
2858 
2859   PetscFunctionBegin;
2860 
2861   if (nz == MAT_SKIP_ALLOCATION) {
2862     skipallocation = PETSC_TRUE;
2863     nz             = 0;
2864   }
2865 
2866   if (bs < 0) {
2867     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2868       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2869     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2870     bs   = PetscAbs(bs);
2871   }
2872   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2873   bs   = newbs;
2874 
2875   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2876   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2877   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2878   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2879 
2880   B->preallocated = PETSC_TRUE;
2881 
2882   mbs  = B->rmap->n/bs;
2883   nbs  = B->cmap->n/bs;
2884   bs2  = bs*bs;
2885 
2886   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2887 
2888   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2889   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2890   if (nnz) {
2891     for (i=0; i<mbs; i++) {
2892       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2893       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2894     }
2895   }
2896 
2897   b       = (Mat_SeqBAIJ*)B->data;
2898   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2899     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2900   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2901 
2902   if (!flg) {
2903     switch (bs) {
2904     case 1:
2905       B->ops->mult            = MatMult_SeqBAIJ_1;
2906       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2907       B->ops->sor             = MatSOR_SeqBAIJ_1;
2908       break;
2909     case 2:
2910       B->ops->mult            = MatMult_SeqBAIJ_2;
2911       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2912       B->ops->sor             = MatSOR_SeqBAIJ_2;
2913       break;
2914     case 3:
2915       B->ops->mult            = MatMult_SeqBAIJ_3;
2916       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2917       B->ops->sor             = MatSOR_SeqBAIJ_3;
2918       break;
2919     case 4:
2920       B->ops->mult            = MatMult_SeqBAIJ_4;
2921       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2922       B->ops->sor             = MatSOR_SeqBAIJ_4;
2923       break;
2924     case 5:
2925       B->ops->mult            = MatMult_SeqBAIJ_5;
2926       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2927       B->ops->sor             = MatSOR_SeqBAIJ_5;
2928       break;
2929     case 6:
2930       B->ops->mult            = MatMult_SeqBAIJ_6;
2931       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2932       B->ops->sor             = MatSOR_SeqBAIJ_6;
2933       break;
2934     case 7:
2935       B->ops->mult            = MatMult_SeqBAIJ_7;
2936       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2937       B->ops->sor             = MatSOR_SeqBAIJ_7;
2938       break;
2939     case 15:
2940       B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2941       break;
2942     default:
2943       B->ops->mult            = MatMult_SeqBAIJ_N;
2944       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2945       break;
2946     }
2947   }
2948   B->rmap->bs      = bs;
2949   b->mbs     = mbs;
2950   b->nbs     = nbs;
2951   if (!skipallocation) {
2952     if (!b->imax) {
2953       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2954       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
2955       b->free_imax_ilen = PETSC_TRUE;
2956     }
2957     /* b->ilen will count nonzeros in each block row so far. */
2958     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2959     if (!nnz) {
2960       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2961       else if (nz <= 0)        nz = 1;
2962       for (i=0; i<mbs; i++) b->imax[i] = nz;
2963       nz = nz*mbs;
2964     } else {
2965       nz = 0;
2966       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2967     }
2968 
2969     /* allocate the matrix space */
2970     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2971     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2972     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2973     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2974     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2975     b->singlemalloc = PETSC_TRUE;
2976     b->i[0] = 0;
2977     for (i=1; i<mbs+1; i++) {
2978       b->i[i] = b->i[i-1] + b->imax[i-1];
2979     }
2980     b->free_a     = PETSC_TRUE;
2981     b->free_ij    = PETSC_TRUE;
2982   } else {
2983     b->free_a     = PETSC_FALSE;
2984     b->free_ij    = PETSC_FALSE;
2985   }
2986 
2987   B->rmap->bs          = bs;
2988   b->bs2              = bs2;
2989   b->mbs              = mbs;
2990   b->nz               = 0;
2991   b->maxnz            = nz*bs2;
2992   B->info.nz_unneeded = (PetscReal)b->maxnz;
2993   PetscFunctionReturn(0);
2994 }
2995 EXTERN_C_END
2996 
2997 EXTERN_C_BEGIN
2998 #undef __FUNCT__
2999 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3000 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3001 {
3002   PetscInt       i,m,nz,nz_max=0,*nnz;
3003   PetscScalar    *values=0;
3004   PetscErrorCode ierr;
3005 
3006   PetscFunctionBegin;
3007   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3008   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3009   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3010   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3011   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3012   m = B->rmap->n/bs;
3013 
3014   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3015   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3016   for(i=0; i<m; i++) {
3017     nz = ii[i+1]- ii[i];
3018     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3019     nz_max = PetscMax(nz_max, nz);
3020     nnz[i] = nz;
3021   }
3022   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3023   ierr = PetscFree(nnz);CHKERRQ(ierr);
3024 
3025   values = (PetscScalar*)V;
3026   if (!values) {
3027     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3028     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3029   }
3030   for (i=0; i<m; i++) {
3031     PetscInt          ncols  = ii[i+1] - ii[i];
3032     const PetscInt    *icols = jj + ii[i];
3033     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3034     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3035   }
3036   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3037   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3038   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3039 
3040   PetscFunctionReturn(0);
3041 }
3042 EXTERN_C_END
3043 
3044 
3045 EXTERN_C_BEGIN
3046 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3047 #if defined(PETSC_HAVE_MUMPS)
3048 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3049 #endif
3050 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3051 EXTERN_C_END
3052 
3053 /*MC
3054    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3055    block sparse compressed row format.
3056 
3057    Options Database Keys:
3058 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3059 
3060   Level: beginner
3061 
3062 .seealso: MatCreateSeqBAIJ()
3063 M*/
3064 
3065 
3066 EXTERN_C_BEGIN
3067 #undef __FUNCT__
3068 #define __FUNCT__ "MatCreate_SeqBAIJ"
3069 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3070 {
3071   PetscErrorCode ierr;
3072   PetscMPIInt    size;
3073   Mat_SeqBAIJ    *b;
3074 
3075   PetscFunctionBegin;
3076   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3077   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3078 
3079   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3080   B->data = (void*)b;
3081   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3082   B->mapping               = 0;
3083   b->row                   = 0;
3084   b->col                   = 0;
3085   b->icol                  = 0;
3086   b->reallocs              = 0;
3087   b->saved_values          = 0;
3088 
3089   b->roworiented           = PETSC_TRUE;
3090   b->nonew                 = 0;
3091   b->diag                  = 0;
3092   b->solve_work            = 0;
3093   b->mult_work             = 0;
3094   B->spptr                 = 0;
3095   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3096   b->keepnonzeropattern    = PETSC_FALSE;
3097   b->xtoy                  = 0;
3098   b->XtoY                  = 0;
3099   b->compressedrow.use     = PETSC_FALSE;
3100   b->compressedrow.nrows   = 0;
3101   b->compressedrow.i       = PETSC_NULL;
3102   b->compressedrow.rindex  = PETSC_NULL;
3103   b->compressedrow.checked = PETSC_FALSE;
3104   B->same_nonzero          = PETSC_FALSE;
3105 
3106   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3107                                      "MatGetFactorAvailable_seqbaij_petsc",
3108                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3110                                      "MatGetFactor_seqbaij_petsc",
3111                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3112 #if defined(PETSC_HAVE_MUMPS)
3113   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3114 #endif
3115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3116                                      "MatInvertBlockDiagonal_SeqBAIJ",
3117                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3118   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3119                                      "MatStoreValues_SeqBAIJ",
3120                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3121   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3122                                      "MatRetrieveValues_SeqBAIJ",
3123                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3124   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3125                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3126                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3127   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3128                                      "MatConvert_SeqBAIJ_SeqAIJ",
3129                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3130   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3131                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3132                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3133   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3134                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3135                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3136   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3137                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3138                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3139   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3140   PetscFunctionReturn(0);
3141 }
3142 EXTERN_C_END
3143 
3144 #undef __FUNCT__
3145 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3146 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3147 {
3148   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3149   PetscErrorCode ierr;
3150   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3151 
3152   PetscFunctionBegin;
3153   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3154 
3155   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3156     c->imax = a->imax;
3157     c->ilen = a->ilen;
3158     c->free_imax_ilen = PETSC_FALSE;
3159   } else {
3160     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3161     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3162     for (i=0; i<mbs; i++) {
3163       c->imax[i] = a->imax[i];
3164       c->ilen[i] = a->ilen[i];
3165     }
3166     c->free_imax_ilen = PETSC_TRUE;
3167   }
3168 
3169   /* allocate the matrix space */
3170   if (mallocmatspace){
3171     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3172       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3173       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3174       c->singlemalloc = PETSC_FALSE;
3175       c->free_ij      = PETSC_FALSE;
3176       c->i            = a->i;
3177       c->j            = a->j;
3178       c->parent       = A;
3179       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3180       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3181       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3182     } else {
3183       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3184       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3185       c->singlemalloc = PETSC_TRUE;
3186       c->free_ij      = PETSC_TRUE;
3187       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3188       if (mbs > 0) {
3189 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3190 	if (cpvalues == MAT_COPY_VALUES) {
3191 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3192 	} else {
3193 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3194 	}
3195       }
3196     }
3197   }
3198 
3199   c->roworiented = a->roworiented;
3200   c->nonew       = a->nonew;
3201   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3202   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3203   c->bs2         = a->bs2;
3204   c->mbs         = a->mbs;
3205   c->nbs         = a->nbs;
3206 
3207   if (a->diag) {
3208     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3209       c->diag      = a->diag;
3210       c->free_diag = PETSC_FALSE;
3211     } else {
3212       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3213       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3214       for (i=0; i<mbs; i++) {
3215         c->diag[i] = a->diag[i];
3216       }
3217       c->free_diag = PETSC_TRUE;
3218     }
3219   } else c->diag        = 0;
3220   c->nz                 = a->nz;
3221   c->maxnz              = bs2*a->nz; /* Since we allocate exactly the right amount */
3222   c->solve_work         = 0;
3223   c->mult_work          = 0;
3224   c->free_a             = PETSC_TRUE;
3225   c->free_ij            = PETSC_TRUE;
3226   C->preallocated       = PETSC_TRUE;
3227   C->assembled          = PETSC_TRUE;
3228 
3229   c->compressedrow.use     = a->compressedrow.use;
3230   c->compressedrow.nrows   = a->compressedrow.nrows;
3231   c->compressedrow.checked = a->compressedrow.checked;
3232   if (a->compressedrow.checked && a->compressedrow.use){
3233     i = a->compressedrow.nrows;
3234     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3235     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3236     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3237     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3238   } else {
3239     c->compressedrow.use    = PETSC_FALSE;
3240     c->compressedrow.i      = PETSC_NULL;
3241     c->compressedrow.rindex = PETSC_NULL;
3242   }
3243   C->same_nonzero = A->same_nonzero;
3244   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3245   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3246   PetscFunctionReturn(0);
3247 }
3248 
3249 #undef __FUNCT__
3250 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3251 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3252 {
3253     PetscErrorCode ierr;
3254 
3255   PetscFunctionBegin;
3256   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3257   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3258   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3259   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3260   PetscFunctionReturn(0);
3261 }
3262 
3263 #undef __FUNCT__
3264 #define __FUNCT__ "MatLoad_SeqBAIJ"
3265 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3266 {
3267   Mat_SeqBAIJ    *a;
3268   Mat            B;
3269   PetscErrorCode ierr;
3270   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3271   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3272   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3273   PetscInt       *masked,nmask,tmp,bs2,ishift;
3274   PetscMPIInt    size;
3275   int            fd;
3276   PetscScalar    *aa;
3277   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3278 
3279   PetscFunctionBegin;
3280   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3281     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3282   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3283   bs2  = bs*bs;
3284 
3285   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3286   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3287   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3288   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3289   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3290   M = header[1]; N = header[2]; nz = header[3];
3291 
3292   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3293   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3294 
3295   /*
3296      This code adds extra rows to make sure the number of rows is
3297     divisible by the blocksize
3298   */
3299   mbs        = M/bs;
3300   extra_rows = bs - M + bs*(mbs);
3301   if (extra_rows == bs) extra_rows = 0;
3302   else                  mbs++;
3303   if (extra_rows) {
3304     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3305   }
3306 
3307   /* read in row lengths */
3308   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3309   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3310   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3311 
3312   /* read in column indices */
3313   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3314   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3315   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3316 
3317   /* loop over row lengths determining block row lengths */
3318   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3319   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3320   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3321   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3322   rowcount = 0;
3323   nzcount = 0;
3324   for (i=0; i<mbs; i++) {
3325     nmask = 0;
3326     for (j=0; j<bs; j++) {
3327       kmax = rowlengths[rowcount];
3328       for (k=0; k<kmax; k++) {
3329         tmp = jj[nzcount++]/bs;
3330         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3331       }
3332       rowcount++;
3333     }
3334     browlengths[i] += nmask;
3335     /* zero out the mask elements we set */
3336     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3337   }
3338 
3339   /* create our matrix */
3340   ierr = MatCreate(comm,&B);
3341   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3342   ierr = MatSetType(B,type);CHKERRQ(ierr);
3343   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3344   a = (Mat_SeqBAIJ*)B->data;
3345 
3346   /* set matrix "i" values */
3347   a->i[0] = 0;
3348   for (i=1; i<= mbs; i++) {
3349     a->i[i]      = a->i[i-1] + browlengths[i-1];
3350     a->ilen[i-1] = browlengths[i-1];
3351   }
3352   a->nz         = 0;
3353   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3354 
3355   /* read in nonzero values */
3356   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3357   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3358   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3359 
3360   /* set "a" and "j" values into matrix */
3361   nzcount = 0; jcount = 0;
3362   for (i=0; i<mbs; i++) {
3363     nzcountb = nzcount;
3364     nmask    = 0;
3365     for (j=0; j<bs; j++) {
3366       kmax = rowlengths[i*bs+j];
3367       for (k=0; k<kmax; k++) {
3368         tmp = jj[nzcount++]/bs;
3369 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3370       }
3371     }
3372     /* sort the masked values */
3373     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3374 
3375     /* set "j" values into matrix */
3376     maskcount = 1;
3377     for (j=0; j<nmask; j++) {
3378       a->j[jcount++]  = masked[j];
3379       mask[masked[j]] = maskcount++;
3380     }
3381     /* set "a" values into matrix */
3382     ishift = bs2*a->i[i];
3383     for (j=0; j<bs; j++) {
3384       kmax = rowlengths[i*bs+j];
3385       for (k=0; k<kmax; k++) {
3386         tmp       = jj[nzcountb]/bs ;
3387         block     = mask[tmp] - 1;
3388         point     = jj[nzcountb] - bs*tmp;
3389         idx       = ishift + bs2*block + j + bs*point;
3390         a->a[idx] = (MatScalar)aa[nzcountb++];
3391       }
3392     }
3393     /* zero out the mask elements we set */
3394     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3395   }
3396   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3397 
3398   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3399   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3400   ierr = PetscFree(aa);CHKERRQ(ierr);
3401   ierr = PetscFree(jj);CHKERRQ(ierr);
3402   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3403 
3404   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3405   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3406   ierr = MatView_Private(B);CHKERRQ(ierr);
3407 
3408   *A = B;
3409   PetscFunctionReturn(0);
3410 }
3411 
3412 #undef __FUNCT__
3413 #define __FUNCT__ "MatLoadnew_SeqBAIJ"
3414 PetscErrorCode MatLoadnew_SeqBAIJ(PetscViewer viewer,Mat newmat)
3415 {
3416   Mat_SeqBAIJ    *a;
3417   PetscErrorCode ierr;
3418   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3419   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3420   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3421   PetscInt       *masked,nmask,tmp,bs2,ishift;
3422   PetscMPIInt    size;
3423   int            fd;
3424   PetscScalar    *aa;
3425   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3426 
3427   PetscFunctionBegin;
3428   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3429   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3430   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3431   bs2  = bs*bs;
3432 
3433   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3434   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3435   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3436   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3437   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3438   M = header[1]; N = header[2]; nz = header[3];
3439 
3440   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3441   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3442 
3443   /*
3444      This code adds extra rows to make sure the number of rows is
3445     divisible by the blocksize
3446   */
3447   mbs        = M/bs;
3448   extra_rows = bs - M + bs*(mbs);
3449   if (extra_rows == bs) extra_rows = 0;
3450   else                  mbs++;
3451   if (extra_rows) {
3452     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3453   }
3454 
3455   /* Set global sizes if not already set */
3456   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3457     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3458   } else { /* Check if the matrix global sizes are correct */
3459     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3460     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3461   }
3462 
3463   /* read in row lengths */
3464   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3465   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3466   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3467 
3468   /* read in column indices */
3469   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3470   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3471   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3472 
3473   /* loop over row lengths determining block row lengths */
3474   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3475   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3476   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3477   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3478   rowcount = 0;
3479   nzcount = 0;
3480   for (i=0; i<mbs; i++) {
3481     nmask = 0;
3482     for (j=0; j<bs; j++) {
3483       kmax = rowlengths[rowcount];
3484       for (k=0; k<kmax; k++) {
3485         tmp = jj[nzcount++]/bs;
3486         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3487       }
3488       rowcount++;
3489     }
3490     browlengths[i] += nmask;
3491     /* zero out the mask elements we set */
3492     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3493   }
3494 
3495   /* Do preallocation  */
3496   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3497   a = (Mat_SeqBAIJ*)newmat->data;
3498 
3499   /* set matrix "i" values */
3500   a->i[0] = 0;
3501   for (i=1; i<= mbs; i++) {
3502     a->i[i]      = a->i[i-1] + browlengths[i-1];
3503     a->ilen[i-1] = browlengths[i-1];
3504   }
3505   a->nz         = 0;
3506   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3507 
3508   /* read in nonzero values */
3509   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3510   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3511   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3512 
3513   /* set "a" and "j" values into matrix */
3514   nzcount = 0; jcount = 0;
3515   for (i=0; i<mbs; i++) {
3516     nzcountb = nzcount;
3517     nmask    = 0;
3518     for (j=0; j<bs; j++) {
3519       kmax = rowlengths[i*bs+j];
3520       for (k=0; k<kmax; k++) {
3521         tmp = jj[nzcount++]/bs;
3522 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3523       }
3524     }
3525     /* sort the masked values */
3526     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3527 
3528     /* set "j" values into matrix */
3529     maskcount = 1;
3530     for (j=0; j<nmask; j++) {
3531       a->j[jcount++]  = masked[j];
3532       mask[masked[j]] = maskcount++;
3533     }
3534     /* set "a" values into matrix */
3535     ishift = bs2*a->i[i];
3536     for (j=0; j<bs; j++) {
3537       kmax = rowlengths[i*bs+j];
3538       for (k=0; k<kmax; k++) {
3539         tmp       = jj[nzcountb]/bs ;
3540         block     = mask[tmp] - 1;
3541         point     = jj[nzcountb] - bs*tmp;
3542         idx       = ishift + bs2*block + j + bs*point;
3543         a->a[idx] = (MatScalar)aa[nzcountb++];
3544       }
3545     }
3546     /* zero out the mask elements we set */
3547     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3548   }
3549   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3550 
3551   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3552   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3553   ierr = PetscFree(aa);CHKERRQ(ierr);
3554   ierr = PetscFree(jj);CHKERRQ(ierr);
3555   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3556 
3557   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3558   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3559   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3560   PetscFunctionReturn(0);
3561 }
3562 
3563 #undef __FUNCT__
3564 #define __FUNCT__ "MatCreateSeqBAIJ"
3565 /*@C
3566    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3567    compressed row) format.  For good matrix assembly performance the
3568    user should preallocate the matrix storage by setting the parameter nz
3569    (or the array nnz).  By setting these parameters accurately, performance
3570    during matrix assembly can be increased by more than a factor of 50.
3571 
3572    Collective on MPI_Comm
3573 
3574    Input Parameters:
3575 +  comm - MPI communicator, set to PETSC_COMM_SELF
3576 .  bs - size of block
3577 .  m - number of rows
3578 .  n - number of columns
3579 .  nz - number of nonzero blocks  per block row (same for all rows)
3580 -  nnz - array containing the number of nonzero blocks in the various block rows
3581          (possibly different for each block row) or PETSC_NULL
3582 
3583    Output Parameter:
3584 .  A - the matrix
3585 
3586    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3587    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3588    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3589 
3590    Options Database Keys:
3591 .   -mat_no_unroll - uses code that does not unroll the loops in the
3592                      block calculations (much slower)
3593 .    -mat_block_size - size of the blocks to use
3594 
3595    Level: intermediate
3596 
3597    Notes:
3598    The number of rows and columns must be divisible by blocksize.
3599 
3600    If the nnz parameter is given then the nz parameter is ignored
3601 
3602    A nonzero block is any block that as 1 or more nonzeros in it
3603 
3604    The block AIJ format is fully compatible with standard Fortran 77
3605    storage.  That is, the stored row and column indices can begin at
3606    either one (as in Fortran) or zero.  See the users' manual for details.
3607 
3608    Specify the preallocated storage with either nz or nnz (not both).
3609    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3610    allocation.  For additional details, see the users manual chapter on
3611    matrices.
3612 
3613 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3614 @*/
3615 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3616 {
3617   PetscErrorCode ierr;
3618 
3619   PetscFunctionBegin;
3620   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3621   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3622   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3623   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3624   PetscFunctionReturn(0);
3625 }
3626 
3627 #undef __FUNCT__
3628 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3629 /*@C
3630    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3631    per row in the matrix. For good matrix assembly performance the
3632    user should preallocate the matrix storage by setting the parameter nz
3633    (or the array nnz).  By setting these parameters accurately, performance
3634    during matrix assembly can be increased by more than a factor of 50.
3635 
3636    Collective on MPI_Comm
3637 
3638    Input Parameters:
3639 +  A - the matrix
3640 .  bs - size of block
3641 .  nz - number of block nonzeros per block row (same for all rows)
3642 -  nnz - array containing the number of block nonzeros in the various block rows
3643          (possibly different for each block row) or PETSC_NULL
3644 
3645    Options Database Keys:
3646 .   -mat_no_unroll - uses code that does not unroll the loops in the
3647                      block calculations (much slower)
3648 .    -mat_block_size - size of the blocks to use
3649 
3650    Level: intermediate
3651 
3652    Notes:
3653    If the nnz parameter is given then the nz parameter is ignored
3654 
3655    You can call MatGetInfo() to get information on how effective the preallocation was;
3656    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3657    You can also run with the option -info and look for messages with the string
3658    malloc in them to see if additional memory allocation was needed.
3659 
3660    The block AIJ format is fully compatible with standard Fortran 77
3661    storage.  That is, the stored row and column indices can begin at
3662    either one (as in Fortran) or zero.  See the users' manual for details.
3663 
3664    Specify the preallocated storage with either nz or nnz (not both).
3665    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3666    allocation.  For additional details, see the users manual chapter on
3667    matrices.
3668 
3669 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3670 @*/
3671 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3672 {
3673   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3674 
3675   PetscFunctionBegin;
3676   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3677   if (f) {
3678     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3679   }
3680   PetscFunctionReturn(0);
3681 }
3682 
3683 #undef __FUNCT__
3684 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3685 /*@C
3686    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3687    (the default sequential PETSc format).
3688 
3689    Collective on MPI_Comm
3690 
3691    Input Parameters:
3692 +  A - the matrix
3693 .  i - the indices into j for the start of each local row (starts with zero)
3694 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3695 -  v - optional values in the matrix
3696 
3697    Level: developer
3698 
3699 .keywords: matrix, aij, compressed row, sparse
3700 
3701 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3702 @*/
3703 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3704 {
3705   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3706 
3707   PetscFunctionBegin;
3708   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3709   if (f) {
3710     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3711   }
3712   PetscFunctionReturn(0);
3713 }
3714 
3715 
3716 #undef __FUNCT__
3717 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3718 /*@
3719      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3720               (upper triangular entries in CSR format) provided by the user.
3721 
3722      Collective on MPI_Comm
3723 
3724    Input Parameters:
3725 +  comm - must be an MPI communicator of size 1
3726 .  bs - size of block
3727 .  m - number of rows
3728 .  n - number of columns
3729 .  i - row indices
3730 .  j - column indices
3731 -  a - matrix values
3732 
3733    Output Parameter:
3734 .  mat - the matrix
3735 
3736    Level: intermediate
3737 
3738    Notes:
3739        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3740     once the matrix is destroyed
3741 
3742        You cannot set new nonzero locations into this matrix, that will generate an error.
3743 
3744        The i and j indices are 0 based
3745 
3746 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3747 
3748 @*/
3749 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3750 {
3751   PetscErrorCode ierr;
3752   PetscInt       ii;
3753   Mat_SeqBAIJ    *baij;
3754 
3755   PetscFunctionBegin;
3756   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3757   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3758 
3759   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3760   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3761   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3762   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3763   baij = (Mat_SeqBAIJ*)(*mat)->data;
3764   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3765   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3766 
3767   baij->i = i;
3768   baij->j = j;
3769   baij->a = a;
3770   baij->singlemalloc = PETSC_FALSE;
3771   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3772   baij->free_a       = PETSC_FALSE;
3773   baij->free_ij       = PETSC_FALSE;
3774 
3775   for (ii=0; ii<m; ii++) {
3776     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3777 #if defined(PETSC_USE_DEBUG)
3778     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3779 #endif
3780   }
3781 #if defined(PETSC_USE_DEBUG)
3782   for (ii=0; ii<baij->i[m]; ii++) {
3783     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3784     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3785   }
3786 #endif
3787 
3788   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3789   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3790   PetscFunctionReturn(0);
3791 }
3792