xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 1784c0f5d0adeb0f1936b3cf989e0bf52e358085)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"
8 #include "petscsys.h"                     /*I "petscmat.h" I*/
9 
10 #include "../src/mat/blockinvert.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
14 /*@
15   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
16 
17   Collective on Mat
18 
19   Input Parameters:
20 . mat - the matrix
21 
22   Level: advanced
23 @*/
24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
25 {
26   PetscErrorCode ierr,(*f)(Mat);
27 
28   PetscFunctionBegin;
29   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
30   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
31   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
32 
33   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
34   if (f) {
35     ierr = (*f)(mat);CHKERRQ(ierr);
36   } else {
37     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 EXTERN_C_BEGIN
43 #undef __FUNCT__
44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
46 {
47   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
48   PetscErrorCode ierr;
49   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
50   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
51   PetscReal      shift = 0.0;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59     ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
60   }
61   diag  = a->idiag;
62   mdiag = a->idiag+bs*bs*mbs;
63   /* factor and invert each block */
64   switch (bs){
65     case 1:
66       for (i=0; i<mbs; i++) {
67         odiag = v + 1*diag_offset[i];
68         diag[0]  = odiag[0];
69         mdiag[0] = odiag[0];
70         diag[0]  = 1.0 / (diag[0] + shift);
71         diag    += 1;
72         mdiag   += 1;
73       }
74       break;
75     case 2:
76       for (i=0; i<mbs; i++) {
77         odiag   = v + 4*diag_offset[i];
78         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
80 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
81 	diag    += 4;
82 	mdiag   += 4;
83       }
84       break;
85     case 3:
86       for (i=0; i<mbs; i++) {
87         odiag    = v + 9*diag_offset[i];
88         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
89         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
90         diag[8]  = odiag[8];
91         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
92         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
93         mdiag[8] = odiag[8];
94 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
95 	diag    += 9;
96 	mdiag   += 9;
97       }
98       break;
99     case 4:
100       for (i=0; i<mbs; i++) {
101         odiag  = v + 16*diag_offset[i];
102         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
104 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
105 	diag  += 16;
106 	mdiag += 16;
107       }
108       break;
109     case 5:
110       for (i=0; i<mbs; i++) {
111         odiag = v + 25*diag_offset[i];
112         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
114 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
115 	diag  += 25;
116 	mdiag += 25;
117       }
118       break;
119     case 6:
120       for (i=0; i<mbs; i++) {
121         odiag = v + 36*diag_offset[i];
122         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
124 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
125 	diag  += 36;
126 	mdiag += 36;
127       }
128       break;
129     default:
130       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   PetscFunctionReturn(0);
134 }
135 EXTERN_C_END
136 
137 #undef __FUNCT__
138 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
139 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
140 {
141   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
142   PetscScalar        *x,x1,s1;
143   const PetscScalar  *b;
144   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
145   PetscErrorCode     ierr;
146   PetscInt           m = a->mbs,i,i2,nz,idx;
147   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
148 
149   PetscFunctionBegin;
150   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
151   its = its*lits;
152   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
153   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
154   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
155   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
156   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
157 
158   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
159 
160   diag  = a->diag;
161   idiag = a->idiag;
162   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
163   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
164 
165   if (flag & SOR_ZERO_INITIAL_GUESS) {
166     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
167       x[0] = b[0]*idiag[0];
168       i2     = 1;
169       idiag += 1;
170       for (i=1; i<m; i++) {
171         v     = aa + ai[i];
172         vi    = aj + ai[i];
173         nz    = diag[i] - ai[i];
174         s1    = b[i2];
175         while (nz--) {
176           idx  = (*vi++);
177           x1   = x[idx];
178           s1  -= v[0]*x1;
179           v   += 1;
180         }
181         x[i2]   = idiag[0]*s1;
182         idiag   += 1;
183         i2      += 1;
184       }
185       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
186       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
187     }
188     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
189         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
190       i2    = 0;
191       mdiag = a->idiag+a->mbs;
192       for (i=0; i<m; i++) {
193         x1      = x[i2];
194         x[i2]   = mdiag[0]*x1;
195         mdiag  += 1;
196         i2     += 1;
197       }
198       ierr = PetscLogFlops(m);CHKERRQ(ierr);
199     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
200       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
201     }
202     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
203       idiag   = a->idiag+a->mbs - 1;
204       i2      = m - 1;
205       x1      = x[i2];
206       x[i2]   = idiag[0]*x1;
207       idiag -= 1;
208       i2    -= 1;
209       for (i=m-2; i>=0; i--) {
210         v     = aa + (diag[i]+1);
211         vi    = aj + diag[i] + 1;
212         nz    = ai[i+1] - diag[i] - 1;
213         s1    = x[i2];
214         while (nz--) {
215           idx  = (*vi++);
216           x1   = x[idx];
217           s1  -= v[0]*x1;
218           v   += 1;
219         }
220         x[i2]   = idiag[0]*s1;
221         idiag   -= 1;
222         i2      -= 1;
223       }
224       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
225     }
226   } else {
227     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
228   }
229   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
230   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 
234 #undef __FUNCT__
235 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
236 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
237 {
238   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
239   PetscScalar        *x,x1,x2,s1,s2;
240   const PetscScalar  *b;
241   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
242   PetscErrorCode     ierr;
243   PetscInt           m = a->mbs,i,i2,nz,idx;
244   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
245 
246   PetscFunctionBegin;
247   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
248   its = its*lits;
249   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
250   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
251   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
252   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
253   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
254 
255   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
256 
257   diag  = a->diag;
258   idiag = a->idiag;
259   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
260   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
261 
262   if (flag & SOR_ZERO_INITIAL_GUESS) {
263     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
264       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
265       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
266       i2     = 2;
267       idiag += 4;
268       for (i=1; i<m; i++) {
269 	v     = aa + 4*ai[i];
270 	vi    = aj + ai[i];
271 	nz    = diag[i] - ai[i];
272 	s1    = b[i2]; s2 = b[i2+1];
273 	while (nz--) {
274 	  idx  = 2*(*vi++);
275 	  x1   = x[idx]; x2 = x[1+idx];
276 	  s1  -= v[0]*x1 + v[2]*x2;
277 	  s2  -= v[1]*x1 + v[3]*x2;
278 	  v   += 4;
279 	}
280 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
281 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
282         idiag   += 4;
283         i2      += 2;
284       }
285       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
286       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
287     }
288     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
289         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
290       i2    = 0;
291       mdiag = a->idiag+4*a->mbs;
292       for (i=0; i<m; i++) {
293         x1      = x[i2]; x2 = x[i2+1];
294         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
295         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
296         mdiag  += 4;
297         i2     += 2;
298       }
299       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
300     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
301       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
302     }
303     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
304       idiag   = a->idiag+4*a->mbs - 4;
305       i2      = 2*m - 2;
306       x1      = x[i2]; x2 = x[i2+1];
307       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
308       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
309       idiag -= 4;
310       i2    -= 2;
311       for (i=m-2; i>=0; i--) {
312 	v     = aa + 4*(diag[i]+1);
313 	vi    = aj + diag[i] + 1;
314 	nz    = ai[i+1] - diag[i] - 1;
315 	s1    = x[i2]; s2 = x[i2+1];
316 	while (nz--) {
317 	  idx  = 2*(*vi++);
318 	  x1   = x[idx]; x2 = x[1+idx];
319 	  s1  -= v[0]*x1 + v[2]*x2;
320 	  s2  -= v[1]*x1 + v[3]*x2;
321 	  v   += 4;
322 	}
323 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
324 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
325         idiag   -= 4;
326         i2      -= 2;
327       }
328       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
329     }
330   } else {
331     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
332   }
333   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
334   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
335   PetscFunctionReturn(0);
336 }
337 
338 #undef __FUNCT__
339 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
340 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
341 {
342   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
343   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
344   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
345   const PetscScalar  *b;
346   PetscErrorCode     ierr;
347   PetscInt           m = a->mbs,i,i2,nz,idx;
348   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
349 
350   PetscFunctionBegin;
351   its = its*lits;
352   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
353   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
354   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
355   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
356   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
357   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
358 
359   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
360 
361   diag  = a->diag;
362   idiag = a->idiag;
363   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
364   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
365 
366   if (flag & SOR_ZERO_INITIAL_GUESS) {
367     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
368       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
369       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
370       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
371       i2     = 3;
372       idiag += 9;
373       for (i=1; i<m; i++) {
374         v     = aa + 9*ai[i];
375         vi    = aj + ai[i];
376         nz    = diag[i] - ai[i];
377         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
378         while (nz--) {
379           idx  = 3*(*vi++);
380           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
381           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
382           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
383           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
384           v   += 9;
385         }
386         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
387         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
388         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
389         idiag   += 9;
390         i2      += 3;
391       }
392       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
393       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
394     }
395     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
396         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
397       i2    = 0;
398       mdiag = a->idiag+9*a->mbs;
399       for (i=0; i<m; i++) {
400         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
401         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
402         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
403         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
404         mdiag  += 9;
405         i2     += 3;
406       }
407       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
408     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
409       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
410     }
411     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
412       idiag   = a->idiag+9*a->mbs - 9;
413       i2      = 3*m - 3;
414       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
415       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
416       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
417       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
418       idiag -= 9;
419       i2    -= 3;
420       for (i=m-2; i>=0; i--) {
421         v     = aa + 9*(diag[i]+1);
422         vi    = aj + diag[i] + 1;
423         nz    = ai[i+1] - diag[i] - 1;
424         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
425         while (nz--) {
426           idx  = 3*(*vi++);
427           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
428           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
429           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
430           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
431           v   += 9;
432         }
433         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
434         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
435         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
436         idiag   -= 9;
437         i2      -= 3;
438       }
439       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
440     }
441   } else {
442     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
443   }
444   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
445   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNCT__
450 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
451 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
452 {
453   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
454   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
455   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
456   const PetscScalar  *b;
457   PetscErrorCode     ierr;
458   PetscInt           m = a->mbs,i,i2,nz,idx;
459   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
460 
461   PetscFunctionBegin;
462   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
463   its = its*lits;
464   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
465   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
466   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
467   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
468   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
469 
470   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
471 
472   diag  = a->diag;
473   idiag = a->idiag;
474   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
475   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
476 
477   if (flag & SOR_ZERO_INITIAL_GUESS) {
478     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
479       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
480       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
481       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
482       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
483       i2     = 4;
484       idiag += 16;
485       for (i=1; i<m; i++) {
486 	v     = aa + 16*ai[i];
487 	vi    = aj + ai[i];
488 	nz    = diag[i] - ai[i];
489 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
490 	while (nz--) {
491 	  idx  = 4*(*vi++);
492 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
493 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
494 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
495 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
496 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
497 	  v   += 16;
498 	}
499 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
500 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
501 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
502 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
503         idiag   += 16;
504         i2      += 4;
505       }
506       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
507       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
508     }
509     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
510         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
511       i2    = 0;
512       mdiag = a->idiag+16*a->mbs;
513       for (i=0; i<m; i++) {
514         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
515         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
516         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
517         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
518         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
519         mdiag  += 16;
520         i2     += 4;
521       }
522       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
523     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
524       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
525     }
526     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
527       idiag   = a->idiag+16*a->mbs - 16;
528       i2      = 4*m - 4;
529       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
530       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
531       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
532       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
533       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
534       idiag -= 16;
535       i2    -= 4;
536       for (i=m-2; i>=0; i--) {
537 	v     = aa + 16*(diag[i]+1);
538 	vi    = aj + diag[i] + 1;
539 	nz    = ai[i+1] - diag[i] - 1;
540 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
541 	while (nz--) {
542 	  idx  = 4*(*vi++);
543 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
544 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
545 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
546 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
547 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
548 	  v   += 16;
549 	}
550 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
551 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
552 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
553 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
554         idiag   -= 16;
555         i2      -= 4;
556       }
557       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
558     }
559   } else {
560     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561   }
562   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
563   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 #undef __FUNCT__
568 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
569 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
570 {
571   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
572   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
573   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
574   const PetscScalar  *b;
575   PetscErrorCode     ierr;
576   PetscInt           m = a->mbs,i,i2,nz,idx;
577   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
578 
579   PetscFunctionBegin;
580   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
581   its = its*lits;
582   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
583   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
584   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
585   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
586   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
587 
588   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
589 
590   diag  = a->diag;
591   idiag = a->idiag;
592   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
593   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
594 
595   if (flag & SOR_ZERO_INITIAL_GUESS) {
596     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
597       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
598       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
599       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
600       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
601       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
602       i2     = 5;
603       idiag += 25;
604       for (i=1; i<m; i++) {
605 	v     = aa + 25*ai[i];
606 	vi    = aj + ai[i];
607 	nz    = diag[i] - ai[i];
608 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
609 	while (nz--) {
610 	  idx  = 5*(*vi++);
611 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
612 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
613 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
614 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
615 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
616 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
617 	  v   += 25;
618 	}
619 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
620 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
621 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
622 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
623 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
624         idiag   += 25;
625         i2      += 5;
626       }
627       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
628       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
629     }
630     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
631         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
632       i2    = 0;
633       mdiag = a->idiag+25*a->mbs;
634       for (i=0; i<m; i++) {
635         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
637         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
638         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
639         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
640         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
641         mdiag  += 25;
642         i2     += 5;
643       }
644       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
645     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
646       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
647     }
648     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
649       idiag   = a->idiag+25*a->mbs - 25;
650       i2      = 5*m - 5;
651       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
652       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
653       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
654       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
655       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
656       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
657       idiag -= 25;
658       i2    -= 5;
659       for (i=m-2; i>=0; i--) {
660 	v     = aa + 25*(diag[i]+1);
661 	vi    = aj + diag[i] + 1;
662 	nz    = ai[i+1] - diag[i] - 1;
663 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
664 	while (nz--) {
665 	  idx  = 5*(*vi++);
666 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
667 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
668 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
669 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
670 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
671 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
672 	  v   += 25;
673 	}
674 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
675 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
676 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
677 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
678 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
679         idiag   -= 25;
680         i2      -= 5;
681       }
682       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
683     }
684   } else {
685     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
686   }
687   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
688   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
689   PetscFunctionReturn(0);
690 }
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
694 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
695 {
696   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
697   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
698   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
699   const PetscScalar  *b;
700   PetscErrorCode     ierr;
701   PetscInt           m = a->mbs,i,i2,nz,idx;
702   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
703 
704   PetscFunctionBegin;
705   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
706   its = its*lits;
707   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
708   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
709   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
710   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
711   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
712 
713   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
714 
715   diag  = a->diag;
716   idiag = a->idiag;
717   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
718   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
719 
720   if (flag & SOR_ZERO_INITIAL_GUESS) {
721     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
722       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
723       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
724       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
725       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
726       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
727       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
728       i2     = 6;
729       idiag += 36;
730       for (i=1; i<m; i++) {
731         v     = aa + 36*ai[i];
732         vi    = aj + ai[i];
733         nz    = diag[i] - ai[i];
734         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
735         while (nz--) {
736           idx  = 6*(*vi++);
737           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
738           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
739           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
740           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
741           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
742           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
743           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
744           v   += 36;
745         }
746         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
747         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
748         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
749         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
750         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
751         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
752         idiag   += 36;
753         i2      += 6;
754       }
755       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
756       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
757     }
758     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
759         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
760       i2    = 0;
761       mdiag = a->idiag+36*a->mbs;
762       for (i=0; i<m; i++) {
763         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
764         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
765         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
766         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
767         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
768         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
769         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
770         mdiag  += 36;
771         i2     += 6;
772       }
773       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
774     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
775       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
776     }
777     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
778       idiag   = a->idiag+36*a->mbs - 36;
779       i2      = 6*m - 6;
780       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
781       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
782       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
783       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
784       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
785       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
786       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
787       idiag -= 36;
788       i2    -= 6;
789       for (i=m-2; i>=0; i--) {
790         v     = aa + 36*(diag[i]+1);
791         vi    = aj + diag[i] + 1;
792         nz    = ai[i+1] - diag[i] - 1;
793         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
794         while (nz--) {
795           idx  = 6*(*vi++);
796           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
797           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
798           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
799           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
800           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
801           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
802           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
803           v   += 36;
804         }
805         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
806         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
807         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
808         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
809         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
810         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
811         idiag   -= 36;
812         i2      -= 6;
813       }
814       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
815     }
816   } else {
817     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
818   }
819   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
820   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
826 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
827 {
828   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
829   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
830   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
831   const PetscScalar  *b;
832   PetscErrorCode     ierr;
833   PetscInt           m = a->mbs,i,i2,nz,idx;
834   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
835 
836   PetscFunctionBegin;
837   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
838   its = its*lits;
839   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
840   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
841   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
842   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
843   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
844 
845   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
846 
847   diag  = a->diag;
848   idiag = a->idiag;
849   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
850   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
851 
852   if (flag & SOR_ZERO_INITIAL_GUESS) {
853     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
854       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
855       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
856       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
857       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
858       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
859       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
860       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
861       i2     = 7;
862       idiag += 49;
863       for (i=1; i<m; i++) {
864         v     = aa + 49*ai[i];
865         vi    = aj + ai[i];
866         nz    = diag[i] - ai[i];
867         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
868         while (nz--) {
869           idx  = 7*(*vi++);
870           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
871           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
872           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
873           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
874           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
875           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
876           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
877           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
878           v   += 49;
879         }
880         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
881         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
882         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
883         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
884         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
885         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
886         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
887         idiag   += 49;
888         i2      += 7;
889       }
890       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
891       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
892     }
893     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
894         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
895       i2    = 0;
896       mdiag = a->idiag+49*a->mbs;
897       for (i=0; i<m; i++) {
898         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
899         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
900         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
901         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
902         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
903         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
904         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
905         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
906         mdiag  += 36;
907         i2     += 6;
908       }
909       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
910     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
911       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
912     }
913     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
914       idiag   = a->idiag+49*a->mbs - 49;
915       i2      = 7*m - 7;
916       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
917       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
918       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
919       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
920       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
921       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
922       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
923       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
924       idiag -= 49;
925       i2    -= 7;
926       for (i=m-2; i>=0; i--) {
927         v     = aa + 49*(diag[i]+1);
928         vi    = aj + diag[i] + 1;
929         nz    = ai[i+1] - diag[i] - 1;
930         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
931         while (nz--) {
932           idx  = 7*(*vi++);
933           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
934           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
935           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
936           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
937           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
938           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
939           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
940           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
941           v   += 49;
942         }
943         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
944         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
945         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
946         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
947         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
948         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
949         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
950         idiag   -= 49;
951         i2      -= 7;
952       }
953       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
954     }
955   } else {
956     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
957   }
958   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
959   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
960   PetscFunctionReturn(0);
961 }
962 
963 /*
964     Special version for direct calls from Fortran (Used in PETSc-fun3d)
965 */
966 #if defined(PETSC_HAVE_FORTRAN_CAPS)
967 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
968 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
969 #define matsetvaluesblocked4_ matsetvaluesblocked4
970 #endif
971 
972 EXTERN_C_BEGIN
973 #undef __FUNCT__
974 #define __FUNCT__ "matsetvaluesblocked4_"
975 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
976 {
977   Mat               A = *AA;
978   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
979   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
980   PetscInt          *ai=a->i,*ailen=a->ilen;
981   PetscInt          *aj=a->j,stepval,lastcol = -1;
982   const PetscScalar *value = v;
983   MatScalar         *ap,*aa = a->a,*bap;
984 
985   PetscFunctionBegin;
986   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
987   stepval = (n-1)*4;
988   for (k=0; k<m; k++) { /* loop over added rows */
989     row  = im[k];
990     rp   = aj + ai[row];
991     ap   = aa + 16*ai[row];
992     nrow = ailen[row];
993     low  = 0;
994     high = nrow;
995     for (l=0; l<n; l++) { /* loop over added columns */
996       col = in[l];
997       if (col <= lastcol) low = 0; else high = nrow;
998       lastcol = col;
999       value = v + k*(stepval+4 + l)*4;
1000       while (high-low > 7) {
1001         t = (low+high)/2;
1002         if (rp[t] > col) high = t;
1003         else             low  = t;
1004       }
1005       for (i=low; i<high; i++) {
1006         if (rp[i] > col) break;
1007         if (rp[i] == col) {
1008           bap  = ap +  16*i;
1009           for (ii=0; ii<4; ii++,value+=stepval) {
1010             for (jj=ii; jj<16; jj+=4) {
1011               bap[jj] += *value++;
1012             }
1013           }
1014           goto noinsert2;
1015         }
1016       }
1017       N = nrow++ - 1;
1018       high++; /* added new column index thus must search to one higher than before */
1019       /* shift up all the later entries in this row */
1020       for (ii=N; ii>=i; ii--) {
1021         rp[ii+1] = rp[ii];
1022         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1023       }
1024       if (N >= i) {
1025         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1026       }
1027       rp[i] = col;
1028       bap   = ap +  16*i;
1029       for (ii=0; ii<4; ii++,value+=stepval) {
1030         for (jj=ii; jj<16; jj+=4) {
1031           bap[jj] = *value++;
1032         }
1033       }
1034       noinsert2:;
1035       low = i;
1036     }
1037     ailen[row] = nrow;
1038   }
1039   PetscFunctionReturnVoid();
1040 }
1041 EXTERN_C_END
1042 
1043 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1044 #define matsetvalues4_ MATSETVALUES4
1045 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1046 #define matsetvalues4_ matsetvalues4
1047 #endif
1048 
1049 EXTERN_C_BEGIN
1050 #undef __FUNCT__
1051 #define __FUNCT__ "MatSetValues4_"
1052 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1053 {
1054   Mat         A = *AA;
1055   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1056   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1057   PetscInt    *ai=a->i,*ailen=a->ilen;
1058   PetscInt    *aj=a->j,brow,bcol;
1059   PetscInt    ridx,cidx,lastcol = -1;
1060   MatScalar   *ap,value,*aa=a->a,*bap;
1061 
1062   PetscFunctionBegin;
1063   for (k=0; k<m; k++) { /* loop over added rows */
1064     row  = im[k]; brow = row/4;
1065     rp   = aj + ai[brow];
1066     ap   = aa + 16*ai[brow];
1067     nrow = ailen[brow];
1068     low  = 0;
1069     high = nrow;
1070     for (l=0; l<n; l++) { /* loop over added columns */
1071       col = in[l]; bcol = col/4;
1072       ridx = row % 4; cidx = col % 4;
1073       value = v[l + k*n];
1074       if (col <= lastcol) low = 0; else high = nrow;
1075       lastcol = col;
1076       while (high-low > 7) {
1077         t = (low+high)/2;
1078         if (rp[t] > bcol) high = t;
1079         else              low  = t;
1080       }
1081       for (i=low; i<high; i++) {
1082         if (rp[i] > bcol) break;
1083         if (rp[i] == bcol) {
1084           bap  = ap +  16*i + 4*cidx + ridx;
1085           *bap += value;
1086           goto noinsert1;
1087         }
1088       }
1089       N = nrow++ - 1;
1090       high++; /* added new column thus must search to one higher than before */
1091       /* shift up all the later entries in this row */
1092       for (ii=N; ii>=i; ii--) {
1093         rp[ii+1] = rp[ii];
1094         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1095       }
1096       if (N>=i) {
1097         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1098       }
1099       rp[i]                    = bcol;
1100       ap[16*i + 4*cidx + ridx] = value;
1101       noinsert1:;
1102       low = i;
1103     }
1104     ailen[brow] = nrow;
1105   }
1106   PetscFunctionReturnVoid();
1107 }
1108 EXTERN_C_END
1109 
1110 /*
1111      Checks for missing diagonals
1112 */
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1115 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1116 {
1117   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1118   PetscErrorCode ierr;
1119   PetscInt       *diag,*jj = a->j,i;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1123   *missing = PETSC_FALSE;
1124   if (A->rmap->n > 0 && !jj) {
1125     *missing  = PETSC_TRUE;
1126     if (d) *d = 0;
1127     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1128   } else {
1129     diag     = a->diag;
1130     for (i=0; i<a->mbs; i++) {
1131       if (jj[diag[i]] != i) {
1132         *missing  = PETSC_TRUE;
1133         if (d) *d = i;
1134         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1135       }
1136     }
1137   }
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1143 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1144 {
1145   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1146   PetscErrorCode ierr;
1147   PetscInt       i,j,m = a->mbs;
1148 
1149   PetscFunctionBegin;
1150   if (!a->diag) {
1151     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1152     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1153     a->free_diag = PETSC_TRUE;
1154   }
1155   for (i=0; i<m; i++) {
1156     a->diag[i] = a->i[i+1];
1157     for (j=a->i[i]; j<a->i[i+1]; j++) {
1158       if (a->j[j] == i) {
1159         a->diag[i] = j;
1160         break;
1161       }
1162     }
1163   }
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 
1168 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1172 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1173 {
1174   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1175   PetscErrorCode ierr;
1176   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1177   PetscInt       *tia, *tja;
1178 
1179   PetscFunctionBegin;
1180   *nn = n;
1181   if (!ia) PetscFunctionReturn(0);
1182   if (symmetric) {
1183     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1184   } else {
1185     tia = a->i; tja = a->j;
1186   }
1187 
1188   if (!blockcompressed && bs > 1) {
1189     (*nn) *= bs;
1190     nbs    = bs;
1191     /* malloc & create the natural set of indices */
1192     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1193     if (n) {
1194       (*ia)[0] = 0;
1195       for (j=1; j<bs; j++) {
1196         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1197       }
1198     }
1199 
1200     for (i=1; i<n; i++) {
1201       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1202       for (j=1; j<bs; j++) {
1203         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1204       }
1205     }
1206     if (n) {
1207       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1208     }
1209 
1210     if (ja) {
1211       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1212       cnt = 0;
1213       for (i=0; i<n; i++) {
1214         for (j=0; j<bs; j++) {
1215           for (k=tia[i]; k<tia[i+1]; k++) {
1216             for (l=0; l<bs; l++) {
1217               (*ja)[cnt++] = bs*tja[k] + l;
1218 	    }
1219           }
1220         }
1221       }
1222     }
1223 
1224     n     *= bs;
1225     nz *= bs*bs;
1226     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1227       ierr = PetscFree(tia);CHKERRQ(ierr);
1228       ierr = PetscFree(tja);CHKERRQ(ierr);
1229     }
1230   } else if (oshift == 1) {
1231     if (symmetric) {
1232       PetscInt nz = tia[A->rmap->n/bs];
1233       /*  add 1 to i and j indices */
1234       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1235       *ia = tia;
1236       if (ja) {
1237 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1238         *ja = tja;
1239       }
1240     } else {
1241       PetscInt nz = a->i[A->rmap->n/bs];
1242       /* malloc space and  add 1 to i and j indices */
1243       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1244       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1245       if (ja) {
1246 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1247 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1248       }
1249     }
1250   } else {
1251     *ia = tia;
1252     if (ja) *ja = tja;
1253   }
1254 
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1260 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1261 {
1262   PetscErrorCode ierr;
1263 
1264   PetscFunctionBegin;
1265   if (!ia) PetscFunctionReturn(0);
1266   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1267     ierr = PetscFree(*ia);CHKERRQ(ierr);
1268     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1269   }
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1275 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1276 {
1277   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1278   PetscErrorCode ierr;
1279 
1280   PetscFunctionBegin;
1281 #if defined(PETSC_USE_LOG)
1282   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1283 #endif
1284   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1285   if (a->row) {
1286     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1287   }
1288   if (a->col) {
1289     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1290   }
1291   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1292   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1293   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1294   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1295   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1296   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1297   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1298   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1299   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1300 
1301   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1302   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1303   ierr = PetscFree(a);CHKERRQ(ierr);
1304 
1305   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1319 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1320 {
1321   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1322   PetscErrorCode ierr;
1323 
1324   PetscFunctionBegin;
1325   switch (op) {
1326   case MAT_ROW_ORIENTED:
1327     a->roworiented    = flg;
1328     break;
1329   case MAT_KEEP_NONZERO_PATTERN:
1330     a->keepnonzeropattern = flg;
1331     break;
1332   case MAT_NEW_NONZERO_LOCATIONS:
1333     a->nonew          = (flg ? 0 : 1);
1334     break;
1335   case MAT_NEW_NONZERO_LOCATION_ERR:
1336     a->nonew          = (flg ? -1 : 0);
1337     break;
1338   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1339     a->nonew          = (flg ? -2 : 0);
1340     break;
1341   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1342     a->nounused       = (flg ? -1 : 0);
1343     break;
1344   case MAT_NEW_DIAGONALS:
1345   case MAT_IGNORE_OFF_PROC_ENTRIES:
1346   case MAT_USE_HASH_TABLE:
1347     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1348     break;
1349   case MAT_SYMMETRIC:
1350   case MAT_STRUCTURALLY_SYMMETRIC:
1351   case MAT_HERMITIAN:
1352   case MAT_SYMMETRY_ETERNAL:
1353     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1354     break;
1355   default:
1356     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1357   }
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 #undef __FUNCT__
1362 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1363 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1364 {
1365   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1366   PetscErrorCode ierr;
1367   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1368   MatScalar      *aa,*aa_i;
1369   PetscScalar    *v_i;
1370 
1371   PetscFunctionBegin;
1372   bs  = A->rmap->bs;
1373   ai  = a->i;
1374   aj  = a->j;
1375   aa  = a->a;
1376   bs2 = a->bs2;
1377 
1378   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1379 
1380   bn  = row/bs;   /* Block number */
1381   bp  = row % bs; /* Block Position */
1382   M   = ai[bn+1] - ai[bn];
1383   *nz = bs*M;
1384 
1385   if (v) {
1386     *v = 0;
1387     if (*nz) {
1388       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1389       for (i=0; i<M; i++) { /* for each block in the block row */
1390         v_i  = *v + i*bs;
1391         aa_i = aa + bs2*(ai[bn] + i);
1392         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1393       }
1394     }
1395   }
1396 
1397   if (idx) {
1398     *idx = 0;
1399     if (*nz) {
1400       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1401       for (i=0; i<M; i++) { /* for each block in the block row */
1402         idx_i = *idx + i*bs;
1403         itmp  = bs*aj[ai[bn] + i];
1404         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1405       }
1406     }
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1413 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1414 {
1415   PetscErrorCode ierr;
1416 
1417   PetscFunctionBegin;
1418   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1419   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1425 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1426 {
1427   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1428   Mat            C;
1429   PetscErrorCode ierr;
1430   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1431   PetscInt       *rows,*cols,bs2=a->bs2;
1432   MatScalar      *array;
1433 
1434   PetscFunctionBegin;
1435   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1436   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1437     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1438     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1439 
1440     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1441     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1442     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1443     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1444     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1445     ierr = PetscFree(col);CHKERRQ(ierr);
1446   } else {
1447     C = *B;
1448   }
1449 
1450   array = a->a;
1451   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1452   cols = rows + bs;
1453   for (i=0; i<mbs; i++) {
1454     cols[0] = i*bs;
1455     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1456     len = ai[i+1] - ai[i];
1457     for (j=0; j<len; j++) {
1458       rows[0] = (*aj++)*bs;
1459       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1460       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1461       array += bs2;
1462     }
1463   }
1464   ierr = PetscFree(rows);CHKERRQ(ierr);
1465 
1466   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1467   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1468 
1469   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1470     *B = C;
1471   } else {
1472     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1473   }
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1479 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1480 {
1481   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1482   PetscErrorCode ierr;
1483   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1484   int            fd;
1485   PetscScalar    *aa;
1486   FILE           *file;
1487 
1488   PetscFunctionBegin;
1489   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1490   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1491   col_lens[0] = MAT_FILE_COOKIE;
1492 
1493   col_lens[1] = A->rmap->N;
1494   col_lens[2] = A->cmap->n;
1495   col_lens[3] = a->nz*bs2;
1496 
1497   /* store lengths of each row and write (including header) to file */
1498   count = 0;
1499   for (i=0; i<a->mbs; i++) {
1500     for (j=0; j<bs; j++) {
1501       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1502     }
1503   }
1504   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1505   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1506 
1507   /* store column indices (zero start index) */
1508   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1509   count = 0;
1510   for (i=0; i<a->mbs; i++) {
1511     for (j=0; j<bs; j++) {
1512       for (k=a->i[i]; k<a->i[i+1]; k++) {
1513         for (l=0; l<bs; l++) {
1514           jj[count++] = bs*a->j[k] + l;
1515         }
1516       }
1517     }
1518   }
1519   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1520   ierr = PetscFree(jj);CHKERRQ(ierr);
1521 
1522   /* store nonzero values */
1523   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1524   count = 0;
1525   for (i=0; i<a->mbs; i++) {
1526     for (j=0; j<bs; j++) {
1527       for (k=a->i[i]; k<a->i[i+1]; k++) {
1528         for (l=0; l<bs; l++) {
1529           aa[count++] = a->a[bs2*k + l*bs + j];
1530         }
1531       }
1532     }
1533   }
1534   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1535   ierr = PetscFree(aa);CHKERRQ(ierr);
1536 
1537   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1538   if (file) {
1539     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1540   }
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1546 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1547 {
1548   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1549   PetscErrorCode    ierr;
1550   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1551   PetscViewerFormat format;
1552 
1553   PetscFunctionBegin;
1554   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1555   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1556     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1557   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1558     Mat aij;
1559     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1560     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1561     ierr = MatDestroy(aij);CHKERRQ(ierr);
1562   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1563      PetscFunctionReturn(0);
1564   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1565     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1566     for (i=0; i<a->mbs; i++) {
1567       for (j=0; j<bs; j++) {
1568         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1569         for (k=a->i[i]; k<a->i[i+1]; k++) {
1570           for (l=0; l<bs; l++) {
1571 #if defined(PETSC_USE_COMPLEX)
1572             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1573               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1574                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1575             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1576               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1577                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1578             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1579               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1580             }
1581 #else
1582             if (a->a[bs2*k + l*bs + j] != 0.0) {
1583               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1584             }
1585 #endif
1586           }
1587         }
1588         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1589       }
1590     }
1591     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1592   } else {
1593     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1594     for (i=0; i<a->mbs; i++) {
1595       for (j=0; j<bs; j++) {
1596         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1597         for (k=a->i[i]; k<a->i[i+1]; k++) {
1598           for (l=0; l<bs; l++) {
1599 #if defined(PETSC_USE_COMPLEX)
1600             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1601               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1602                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1603             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1604               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1605                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1606             } else {
1607               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1608             }
1609 #else
1610             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1611 #endif
1612           }
1613         }
1614         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1615       }
1616     }
1617     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1618   }
1619   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 #undef __FUNCT__
1624 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1625 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1626 {
1627   Mat            A = (Mat) Aa;
1628   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1629   PetscErrorCode ierr;
1630   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1631   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1632   MatScalar      *aa;
1633   PetscViewer    viewer;
1634 
1635   PetscFunctionBegin;
1636 
1637   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1638   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1639 
1640   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1641 
1642   /* loop over matrix elements drawing boxes */
1643   color = PETSC_DRAW_BLUE;
1644   for (i=0,row=0; i<mbs; i++,row+=bs) {
1645     for (j=a->i[i]; j<a->i[i+1]; j++) {
1646       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1647       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1648       aa = a->a + j*bs2;
1649       for (k=0; k<bs; k++) {
1650         for (l=0; l<bs; l++) {
1651           if (PetscRealPart(*aa++) >=  0.) continue;
1652           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1653         }
1654       }
1655     }
1656   }
1657   color = PETSC_DRAW_CYAN;
1658   for (i=0,row=0; i<mbs; i++,row+=bs) {
1659     for (j=a->i[i]; j<a->i[i+1]; j++) {
1660       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1661       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1662       aa = a->a + j*bs2;
1663       for (k=0; k<bs; k++) {
1664         for (l=0; l<bs; l++) {
1665           if (PetscRealPart(*aa++) != 0.) continue;
1666           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1667         }
1668       }
1669     }
1670   }
1671 
1672   color = PETSC_DRAW_RED;
1673   for (i=0,row=0; i<mbs; i++,row+=bs) {
1674     for (j=a->i[i]; j<a->i[i+1]; j++) {
1675       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1676       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1677       aa = a->a + j*bs2;
1678       for (k=0; k<bs; k++) {
1679         for (l=0; l<bs; l++) {
1680           if (PetscRealPart(*aa++) <= 0.) continue;
1681           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1682         }
1683       }
1684     }
1685   }
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1691 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1692 {
1693   PetscErrorCode ierr;
1694   PetscReal      xl,yl,xr,yr,w,h;
1695   PetscDraw      draw;
1696   PetscTruth     isnull;
1697 
1698   PetscFunctionBegin;
1699 
1700   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1701   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1702 
1703   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1704   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1705   xr += w;    yr += h;  xl = -w;     yl = -h;
1706   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1707   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1708   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 #undef __FUNCT__
1713 #define __FUNCT__ "MatView_SeqBAIJ"
1714 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1715 {
1716   PetscErrorCode ierr;
1717   PetscTruth     iascii,isbinary,isdraw;
1718 
1719   PetscFunctionBegin;
1720   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1721   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1722   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1723   if (iascii){
1724     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1725   } else if (isbinary) {
1726     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1727   } else if (isdraw) {
1728     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1729   } else {
1730     Mat B;
1731     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1732     ierr = MatView(B,viewer);CHKERRQ(ierr);
1733     ierr = MatDestroy(B);CHKERRQ(ierr);
1734   }
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1741 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1742 {
1743   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1744   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1745   PetscInt    *ai = a->i,*ailen = a->ilen;
1746   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1747   MatScalar   *ap,*aa = a->a;
1748 
1749   PetscFunctionBegin;
1750   for (k=0; k<m; k++) { /* loop over rows */
1751     row  = im[k]; brow = row/bs;
1752     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1753     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1754     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1755     nrow = ailen[brow];
1756     for (l=0; l<n; l++) { /* loop over columns */
1757       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1758       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1759       col  = in[l] ;
1760       bcol = col/bs;
1761       cidx = col%bs;
1762       ridx = row%bs;
1763       high = nrow;
1764       low  = 0; /* assume unsorted */
1765       while (high-low > 5) {
1766         t = (low+high)/2;
1767         if (rp[t] > bcol) high = t;
1768         else             low  = t;
1769       }
1770       for (i=low; i<high; i++) {
1771         if (rp[i] > bcol) break;
1772         if (rp[i] == bcol) {
1773           *v++ = ap[bs2*i+bs*cidx+ridx];
1774           goto finished;
1775         }
1776       }
1777       *v++ = 0.0;
1778       finished:;
1779     }
1780   }
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 #define CHUNKSIZE 10
1785 #undef __FUNCT__
1786 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1787 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1788 {
1789   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1790   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1791   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1792   PetscErrorCode    ierr;
1793   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1794   PetscTruth        roworiented=a->roworiented;
1795   const PetscScalar *value = v;
1796   MatScalar         *ap,*aa = a->a,*bap;
1797 
1798   PetscFunctionBegin;
1799   if (roworiented) {
1800     stepval = (n-1)*bs;
1801   } else {
1802     stepval = (m-1)*bs;
1803   }
1804   for (k=0; k<m; k++) { /* loop over added rows */
1805     row  = im[k];
1806     if (row < 0) continue;
1807 #if defined(PETSC_USE_DEBUG)
1808     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1809 #endif
1810     rp   = aj + ai[row];
1811     ap   = aa + bs2*ai[row];
1812     rmax = imax[row];
1813     nrow = ailen[row];
1814     low  = 0;
1815     high = nrow;
1816     for (l=0; l<n; l++) { /* loop over added columns */
1817       if (in[l] < 0) continue;
1818 #if defined(PETSC_USE_DEBUG)
1819       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1820 #endif
1821       col = in[l];
1822       if (roworiented) {
1823         value = v + k*(stepval+bs)*bs + l*bs;
1824       } else {
1825         value = v + l*(stepval+bs)*bs + k*bs;
1826       }
1827       if (col <= lastcol) low = 0; else high = nrow;
1828       lastcol = col;
1829       while (high-low > 7) {
1830         t = (low+high)/2;
1831         if (rp[t] > col) high = t;
1832         else             low  = t;
1833       }
1834       for (i=low; i<high; i++) {
1835         if (rp[i] > col) break;
1836         if (rp[i] == col) {
1837           bap  = ap +  bs2*i;
1838           if (roworiented) {
1839             if (is == ADD_VALUES) {
1840               for (ii=0; ii<bs; ii++,value+=stepval) {
1841                 for (jj=ii; jj<bs2; jj+=bs) {
1842                   bap[jj] += *value++;
1843                 }
1844               }
1845             } else {
1846               for (ii=0; ii<bs; ii++,value+=stepval) {
1847                 for (jj=ii; jj<bs2; jj+=bs) {
1848                   bap[jj] = *value++;
1849                 }
1850               }
1851             }
1852           } else {
1853             if (is == ADD_VALUES) {
1854               for (ii=0; ii<bs; ii++,value+=stepval) {
1855                 for (jj=0; jj<bs; jj++) {
1856                   *bap++ += *value++;
1857                 }
1858               }
1859             } else {
1860               for (ii=0; ii<bs; ii++,value+=stepval) {
1861                 for (jj=0; jj<bs; jj++) {
1862                   *bap++  = *value++;
1863                 }
1864               }
1865             }
1866           }
1867           goto noinsert2;
1868         }
1869       }
1870       if (nonew == 1) goto noinsert2;
1871       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1872       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1873       N = nrow++ - 1; high++;
1874       /* shift up all the later entries in this row */
1875       for (ii=N; ii>=i; ii--) {
1876         rp[ii+1] = rp[ii];
1877         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1878       }
1879       if (N >= i) {
1880         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1881       }
1882       rp[i] = col;
1883       bap   = ap +  bs2*i;
1884       if (roworiented) {
1885         for (ii=0; ii<bs; ii++,value+=stepval) {
1886           for (jj=ii; jj<bs2; jj+=bs) {
1887             bap[jj] = *value++;
1888           }
1889         }
1890       } else {
1891         for (ii=0; ii<bs; ii++,value+=stepval) {
1892           for (jj=0; jj<bs; jj++) {
1893             *bap++  = *value++;
1894           }
1895         }
1896       }
1897       noinsert2:;
1898       low = i;
1899     }
1900     ailen[row] = nrow;
1901   }
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 #undef __FUNCT__
1906 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1907 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1908 {
1909   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1910   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1911   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1912   PetscErrorCode ierr;
1913   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1914   MatScalar      *aa = a->a,*ap;
1915   PetscReal      ratio=0.6;
1916 
1917   PetscFunctionBegin;
1918   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1919 
1920   if (m) rmax = ailen[0];
1921   for (i=1; i<mbs; i++) {
1922     /* move each row back by the amount of empty slots (fshift) before it*/
1923     fshift += imax[i-1] - ailen[i-1];
1924     rmax   = PetscMax(rmax,ailen[i]);
1925     if (fshift) {
1926       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1927       N = ailen[i];
1928       for (j=0; j<N; j++) {
1929         ip[j-fshift] = ip[j];
1930         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1931       }
1932     }
1933     ai[i] = ai[i-1] + ailen[i-1];
1934   }
1935   if (mbs) {
1936     fshift += imax[mbs-1] - ailen[mbs-1];
1937     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1938   }
1939   /* reset ilen and imax for each row */
1940   for (i=0; i<mbs; i++) {
1941     ailen[i] = imax[i] = ai[i+1] - ai[i];
1942   }
1943   a->nz = ai[mbs];
1944 
1945   /* diagonals may have moved, so kill the diagonal pointers */
1946   a->idiagvalid = PETSC_FALSE;
1947   if (fshift && a->diag) {
1948     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1949     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1950     a->diag = 0;
1951   }
1952   if (fshift && a->nounused == -1) {
1953     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1954   }
1955   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1956   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1957   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1958   a->reallocs          = 0;
1959   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1960 
1961   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1962   if (a->compressedrow.use){
1963     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1964   }
1965 
1966   A->same_nonzero = PETSC_TRUE;
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 /*
1971    This function returns an array of flags which indicate the locations of contiguous
1972    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1973    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1974    Assume: sizes should be long enough to hold all the values.
1975 */
1976 #undef __FUNCT__
1977 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1978 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1979 {
1980   PetscInt   i,j,k,row;
1981   PetscTruth flg;
1982 
1983   PetscFunctionBegin;
1984   for (i=0,j=0; i<n; j++) {
1985     row = idx[i];
1986     if (row%bs!=0) { /* Not the begining of a block */
1987       sizes[j] = 1;
1988       i++;
1989     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1990       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1991       i++;
1992     } else { /* Begining of the block, so check if the complete block exists */
1993       flg = PETSC_TRUE;
1994       for (k=1; k<bs; k++) {
1995         if (row+k != idx[i+k]) { /* break in the block */
1996           flg = PETSC_FALSE;
1997           break;
1998         }
1999       }
2000       if (flg) { /* No break in the bs */
2001         sizes[j] = bs;
2002         i+= bs;
2003       } else {
2004         sizes[j] = 1;
2005         i++;
2006       }
2007     }
2008   }
2009   *bs_max = j;
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 #undef __FUNCT__
2014 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2015 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2016 {
2017   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2018   PetscErrorCode ierr;
2019   PetscInt       i,j,k,count,*rows;
2020   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2021   PetscScalar    zero = 0.0;
2022   MatScalar      *aa;
2023 
2024   PetscFunctionBegin;
2025   /* Make a copy of the IS and  sort it */
2026   /* allocate memory for rows,sizes */
2027   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
2028   sizes = rows + is_n;
2029 
2030   /* copy IS values to rows, and sort them */
2031   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2032   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2033   if (baij->keepnonzeropattern) {
2034     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2035     bs_max = is_n;
2036     A->same_nonzero = PETSC_TRUE;
2037   } else {
2038     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2039     A->same_nonzero = PETSC_FALSE;
2040   }
2041 
2042   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2043     row   = rows[j];
2044     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2045     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2046     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2047     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2048       if (diag != 0.0) {
2049         if (baij->ilen[row/bs] > 0) {
2050           baij->ilen[row/bs]       = 1;
2051           baij->j[baij->i[row/bs]] = row/bs;
2052           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2053         }
2054         /* Now insert all the diagonal values for this bs */
2055         for (k=0; k<bs; k++) {
2056           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2057         }
2058       } else { /* (diag == 0.0) */
2059         baij->ilen[row/bs] = 0;
2060       } /* end (diag == 0.0) */
2061     } else { /* (sizes[i] != bs) */
2062 #if defined (PETSC_USE_DEBUG)
2063       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2064 #endif
2065       for (k=0; k<count; k++) {
2066         aa[0] =  zero;
2067         aa    += bs;
2068       }
2069       if (diag != 0.0) {
2070         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2071       }
2072     }
2073   }
2074 
2075   ierr = PetscFree(rows);CHKERRQ(ierr);
2076   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 #undef __FUNCT__
2081 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2082 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2083 {
2084   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2085   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2086   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2087   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2088   PetscErrorCode ierr;
2089   PetscInt       ridx,cidx,bs2=a->bs2;
2090   PetscTruth     roworiented=a->roworiented;
2091   MatScalar      *ap,value,*aa=a->a,*bap;
2092 
2093   PetscFunctionBegin;
2094   for (k=0; k<m; k++) { /* loop over added rows */
2095     row  = im[k];
2096     brow = row/bs;
2097     if (row < 0) continue;
2098 #if defined(PETSC_USE_DEBUG)
2099     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2100 #endif
2101     rp   = aj + ai[brow];
2102     ap   = aa + bs2*ai[brow];
2103     rmax = imax[brow];
2104     nrow = ailen[brow];
2105     low  = 0;
2106     high = nrow;
2107     for (l=0; l<n; l++) { /* loop over added columns */
2108       if (in[l] < 0) continue;
2109 #if defined(PETSC_USE_DEBUG)
2110       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2111 #endif
2112       col = in[l]; bcol = col/bs;
2113       ridx = row % bs; cidx = col % bs;
2114       if (roworiented) {
2115         value = v[l + k*n];
2116       } else {
2117         value = v[k + l*m];
2118       }
2119       if (col <= lastcol) low = 0; else high = nrow;
2120       lastcol = col;
2121       while (high-low > 7) {
2122         t = (low+high)/2;
2123         if (rp[t] > bcol) high = t;
2124         else              low  = t;
2125       }
2126       for (i=low; i<high; i++) {
2127         if (rp[i] > bcol) break;
2128         if (rp[i] == bcol) {
2129           bap  = ap +  bs2*i + bs*cidx + ridx;
2130           if (is == ADD_VALUES) *bap += value;
2131           else                  *bap  = value;
2132           goto noinsert1;
2133         }
2134       }
2135       if (nonew == 1) goto noinsert1;
2136       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2137       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2138       N = nrow++ - 1; high++;
2139       /* shift up all the later entries in this row */
2140       for (ii=N; ii>=i; ii--) {
2141         rp[ii+1] = rp[ii];
2142         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2143       }
2144       if (N>=i) {
2145         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2146       }
2147       rp[i]                      = bcol;
2148       ap[bs2*i + bs*cidx + ridx] = value;
2149       a->nz++;
2150       noinsert1:;
2151       low = i;
2152     }
2153     ailen[brow] = nrow;
2154   }
2155   A->same_nonzero = PETSC_FALSE;
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2160 
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2163 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2164 {
2165   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2166   Mat            outA;
2167   PetscErrorCode ierr;
2168   PetscTruth     row_identity,col_identity;
2169 
2170   PetscFunctionBegin;
2171   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2172   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2173   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2174   if (!row_identity || !col_identity) {
2175     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2176   }
2177 
2178   outA          = inA;
2179   inA->factor   = MAT_FACTOR_LU;
2180 
2181   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2182 
2183   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2184   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2185   a->row = row;
2186   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2187   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2188   a->col = col;
2189 
2190   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2191   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2192   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2193 
2194   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2195   if (!a->solve_work) {
2196     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2197     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2198   }
2199   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2200 
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 EXTERN_C_BEGIN
2205 #undef __FUNCT__
2206 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2207 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2208 {
2209   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2210   PetscInt    i,nz,mbs;
2211 
2212   PetscFunctionBegin;
2213   nz  = baij->maxnz/baij->bs2;
2214   mbs = baij->mbs;
2215   for (i=0; i<nz; i++) {
2216     baij->j[i] = indices[i];
2217   }
2218   baij->nz = nz;
2219   for (i=0; i<mbs; i++) {
2220     baij->ilen[i] = baij->imax[i];
2221   }
2222   PetscFunctionReturn(0);
2223 }
2224 EXTERN_C_END
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2228 /*@
2229     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2230        in the matrix.
2231 
2232   Input Parameters:
2233 +  mat - the SeqBAIJ matrix
2234 -  indices - the column indices
2235 
2236   Level: advanced
2237 
2238   Notes:
2239     This can be called if you have precomputed the nonzero structure of the
2240   matrix and want to provide it to the matrix object to improve the performance
2241   of the MatSetValues() operation.
2242 
2243     You MUST have set the correct numbers of nonzeros per row in the call to
2244   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2245 
2246     MUST be called before any calls to MatSetValues();
2247 
2248 @*/
2249 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2250 {
2251   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2252 
2253   PetscFunctionBegin;
2254   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2255   PetscValidPointer(indices,2);
2256   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2257   if (f) {
2258     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2259   } else {
2260     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2261   }
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2267 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2268 {
2269   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2270   PetscErrorCode ierr;
2271   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2272   PetscReal      atmp;
2273   PetscScalar    *x,zero = 0.0;
2274   MatScalar      *aa;
2275   PetscInt       ncols,brow,krow,kcol;
2276 
2277   PetscFunctionBegin;
2278   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2279   bs   = A->rmap->bs;
2280   aa   = a->a;
2281   ai   = a->i;
2282   aj   = a->j;
2283   mbs  = a->mbs;
2284 
2285   ierr = VecSet(v,zero);CHKERRQ(ierr);
2286   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2287   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2288   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2289   for (i=0; i<mbs; i++) {
2290     ncols = ai[1] - ai[0]; ai++;
2291     brow  = bs*i;
2292     for (j=0; j<ncols; j++){
2293       for (kcol=0; kcol<bs; kcol++){
2294         for (krow=0; krow<bs; krow++){
2295           atmp = PetscAbsScalar(*aa);aa++;
2296           row = brow + krow;    /* row index */
2297           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2298           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2299         }
2300       }
2301       aj++;
2302     }
2303   }
2304   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #undef __FUNCT__
2309 #define __FUNCT__ "MatCopy_SeqBAIJ"
2310 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2311 {
2312   PetscErrorCode ierr;
2313 
2314   PetscFunctionBegin;
2315   /* If the two matrices have the same copy implementation, use fast copy. */
2316   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2317     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2318     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2319 
2320     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2321       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2322     }
2323     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2324   } else {
2325     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2326   }
2327   PetscFunctionReturn(0);
2328 }
2329 
2330 #undef __FUNCT__
2331 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2332 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2333 {
2334   PetscErrorCode ierr;
2335 
2336   PetscFunctionBegin;
2337   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 #undef __FUNCT__
2342 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2343 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2344 {
2345   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2346   PetscFunctionBegin;
2347   *array = a->a;
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 #undef __FUNCT__
2352 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2353 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2354 {
2355   PetscFunctionBegin;
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 #include "petscblaslapack.h"
2360 #undef __FUNCT__
2361 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2362 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2363 {
2364   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2365   PetscErrorCode ierr;
2366   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2367   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2368 
2369   PetscFunctionBegin;
2370   if (str == SAME_NONZERO_PATTERN) {
2371     PetscScalar alpha = a;
2372     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2373   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2374     if (y->xtoy && y->XtoY != X) {
2375       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2376       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2377     }
2378     if (!y->xtoy) { /* get xtoy */
2379       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2380       y->XtoY = X;
2381       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2382     }
2383     bs2 = bs*bs;
2384     for (i=0; i<x->nz; i++) {
2385       j = 0;
2386       while (j < bs2){
2387         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2388         j++;
2389       }
2390     }
2391     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2392   } else {
2393     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2394   }
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 #undef __FUNCT__
2399 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2400 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2401 {
2402   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2403   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2404   MatScalar      *aa = a->a;
2405 
2406   PetscFunctionBegin;
2407   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 #undef __FUNCT__
2412 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2413 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2414 {
2415   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2416   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2417   MatScalar      *aa = a->a;
2418 
2419   PetscFunctionBegin;
2420   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2428 /*
2429     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2430 */
2431 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2432 {
2433   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2434   PetscErrorCode ierr;
2435   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2436   PetscInt       nz = a->i[m],row,*jj,mr,col;
2437 
2438   PetscFunctionBegin;
2439   *nn = n;
2440   if (!ia) PetscFunctionReturn(0);
2441   if (symmetric) {
2442     SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices");
2443   } else {
2444     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2445     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2446     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2447     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2448     jj = a->j;
2449     for (i=0; i<nz; i++) {
2450       collengths[jj[i]]++;
2451     }
2452     cia[0] = oshift;
2453     for (i=0; i<n; i++) {
2454       cia[i+1] = cia[i] + collengths[i];
2455     }
2456     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2457     jj   = a->j;
2458     for (row=0; row<m; row++) {
2459       mr = a->i[row+1] - a->i[row];
2460       for (i=0; i<mr; i++) {
2461         col = *jj++;
2462         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2463       }
2464     }
2465     ierr = PetscFree(collengths);CHKERRQ(ierr);
2466     *ia = cia; *ja = cja;
2467   }
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2473 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2474 {
2475   PetscErrorCode ierr;
2476 
2477   PetscFunctionBegin;
2478   if (!ia) PetscFunctionReturn(0);
2479   ierr = PetscFree(*ia);CHKERRQ(ierr);
2480   ierr = PetscFree(*ja);CHKERRQ(ierr);
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2486 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2487 {
2488   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2489   PetscErrorCode ierr;
2490   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2491   PetscScalar    dx,*y,*xx,*w3_array;
2492   PetscScalar    *vscale_array;
2493   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2494   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2495   void           *fctx = coloring->fctx;
2496   PetscTruth     flg = PETSC_FALSE;
2497   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2498   Vec            x1_tmp;
2499 
2500   PetscFunctionBegin;
2501   PetscValidHeaderSpecific(J,MAT_COOKIE,1);
2502   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2);
2503   PetscValidHeaderSpecific(x1,VEC_COOKIE,3);
2504   if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2505 
2506   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2507   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2508   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2509   if (flg) {
2510     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2511   } else {
2512     PetscTruth assembled;
2513     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2514     if (assembled) {
2515       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2516     }
2517   }
2518 
2519   x1_tmp = x1;
2520   if (!coloring->vscale){
2521     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2522   }
2523 
2524   /*
2525     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2526     coloring->F for the coarser grids from the finest
2527   */
2528   if (coloring->F) {
2529     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2530     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2531     if (m1 != m2) {
2532       coloring->F = 0;
2533       }
2534     }
2535 
2536   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2537     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2538   }
2539   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2540 
2541   /* Set w1 = F(x1) */
2542   if (coloring->F) {
2543     w1          = coloring->F; /* use already computed value of function */
2544     coloring->F = 0;
2545   } else {
2546     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2547     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2548     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2549   }
2550 
2551   if (!coloring->w3) {
2552     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2553     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2554   }
2555   w3 = coloring->w3;
2556 
2557     CHKMEMQ;
2558     /* Compute all the local scale factors, including ghost points */
2559   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2560   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2561   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2562   if (ctype == IS_COLORING_GHOSTED){
2563     col_start = 0; col_end = N;
2564   } else if (ctype == IS_COLORING_GLOBAL){
2565     xx = xx - start;
2566     vscale_array = vscale_array - start;
2567     col_start = start; col_end = N + start;
2568   }    CHKMEMQ;
2569   for (col=col_start; col<col_end; col++){
2570     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2571     if (coloring->htype[0] == 'w') {
2572       dx = 1.0 + unorm;
2573     } else {
2574       dx  = xx[col];
2575     }
2576     if (dx == 0.0) dx = 1.0;
2577 #if !defined(PETSC_USE_COMPLEX)
2578     if (dx < umin && dx >= 0.0)      dx = umin;
2579     else if (dx < 0.0 && dx > -umin) dx = -umin;
2580 #else
2581     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2582     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2583 #endif
2584     dx               *= epsilon;
2585     vscale_array[col] = 1.0/dx;
2586   }     CHKMEMQ;
2587   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2588   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2589   if (ctype == IS_COLORING_GLOBAL){
2590     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2591     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2592   }
2593         CHKMEMQ;
2594   if (coloring->vscaleforrow) {
2595     vscaleforrow = coloring->vscaleforrow;
2596   } else {
2597     SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2598   }
2599 
2600 
2601   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2602   /*
2603     Loop over each color
2604   */
2605   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2606   for (k=0; k<coloring->ncolors; k++) {
2607     coloring->currentcolor = k;
2608     for (i=0; i<bs; i++) {
2609       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2610       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2611       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2612       /*
2613 	Loop over each column associated with color
2614 	adding the perturbation to the vector w3.
2615       */
2616       for (l=0; l<coloring->ncolumns[k]; l++) {
2617 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2618 	if (coloring->htype[0] == 'w') {
2619 	  dx = 1.0 + unorm;
2620 	} else {
2621 	  dx  = xx[col];
2622 	}
2623 	if (dx == 0.0) dx = 1.0;
2624 #if !defined(PETSC_USE_COMPLEX)
2625 	if (dx < umin && dx >= 0.0)      dx = umin;
2626 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2627 #else
2628 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2629 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2630 #endif
2631 	dx            *= epsilon;
2632 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2633 	w3_array[col] += dx;
2634       }
2635       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2636       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2637 
2638       /*
2639 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2640 	w2 = F(x1 + dx) - F(x1)
2641       */
2642       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2643       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2644       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2645       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2646 
2647       /*
2648 	Loop over rows of vector, putting results into Jacobian matrix
2649       */
2650       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2651       for (l=0; l<coloring->nrows[k]; l++) {
2652 	row    = bs*coloring->rows[k][l];             /* local row index */
2653 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2654         for (j=0; j<bs; j++) {
2655   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2656           srows[j]  = row + start + j;
2657         }
2658 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2659       }
2660       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2661     }
2662   } /* endof for each color */
2663   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2664   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2665   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2666   ierr = PetscFree(srows);CHKERRQ(ierr);
2667 
2668   coloring->currentcolor = -1;
2669   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2670   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2671   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2672   PetscFunctionReturn(0);
2673 }
2674 
2675 /* -------------------------------------------------------------------*/
2676 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2677        MatGetRow_SeqBAIJ,
2678        MatRestoreRow_SeqBAIJ,
2679        MatMult_SeqBAIJ_N,
2680 /* 4*/ MatMultAdd_SeqBAIJ_N,
2681        MatMultTranspose_SeqBAIJ,
2682        MatMultTransposeAdd_SeqBAIJ,
2683        0,
2684        0,
2685        0,
2686 /*10*/ 0,
2687        MatLUFactor_SeqBAIJ,
2688        0,
2689        0,
2690        MatTranspose_SeqBAIJ,
2691 /*15*/ MatGetInfo_SeqBAIJ,
2692        MatEqual_SeqBAIJ,
2693        MatGetDiagonal_SeqBAIJ,
2694        MatDiagonalScale_SeqBAIJ,
2695        MatNorm_SeqBAIJ,
2696 /*20*/ 0,
2697        MatAssemblyEnd_SeqBAIJ,
2698        MatSetOption_SeqBAIJ,
2699        MatZeroEntries_SeqBAIJ,
2700 /*24*/ MatZeroRows_SeqBAIJ,
2701        0,
2702        0,
2703        0,
2704        0,
2705 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2706        0,
2707        0,
2708        MatGetArray_SeqBAIJ,
2709        MatRestoreArray_SeqBAIJ,
2710 /*34*/ MatDuplicate_SeqBAIJ,
2711        0,
2712        0,
2713        MatILUFactor_SeqBAIJ,
2714        0,
2715 /*39*/ MatAXPY_SeqBAIJ,
2716        MatGetSubMatrices_SeqBAIJ,
2717        MatIncreaseOverlap_SeqBAIJ,
2718        MatGetValues_SeqBAIJ,
2719        MatCopy_SeqBAIJ,
2720 /*44*/ 0,
2721        MatScale_SeqBAIJ,
2722        0,
2723        0,
2724        MatILUDTFactor_SeqBAIJ,
2725 /*49*/ 0,
2726        MatGetRowIJ_SeqBAIJ,
2727        MatRestoreRowIJ_SeqBAIJ,
2728        MatGetColumnIJ_SeqBAIJ,
2729        MatRestoreColumnIJ_SeqBAIJ,
2730 /*54*/ MatFDColoringCreate_SeqAIJ,
2731        0,
2732        0,
2733        0,
2734        MatSetValuesBlocked_SeqBAIJ,
2735 /*59*/ MatGetSubMatrix_SeqBAIJ,
2736        MatDestroy_SeqBAIJ,
2737        MatView_SeqBAIJ,
2738        0,
2739        0,
2740 /*64*/ 0,
2741        0,
2742        0,
2743        0,
2744        0,
2745 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2746        0,
2747        MatConvert_Basic,
2748        0,
2749        0,
2750 /*74*/ 0,
2751        MatFDColoringApply_BAIJ,
2752        0,
2753        0,
2754        0,
2755 /*79*/ 0,
2756        0,
2757        0,
2758        0,
2759        MatLoad_SeqBAIJ,
2760 /*84*/ 0,
2761        0,
2762        0,
2763        0,
2764        0,
2765 /*89*/ 0,
2766        0,
2767        0,
2768        0,
2769        0,
2770 /*94*/ 0,
2771        0,
2772        0,
2773        0,
2774        0,
2775 /*99*/0,
2776        0,
2777        0,
2778        0,
2779        0,
2780 /*104*/0,
2781        MatRealPart_SeqBAIJ,
2782        MatImaginaryPart_SeqBAIJ,
2783        0,
2784        0,
2785 /*109*/0,
2786        0,
2787        0,
2788        0,
2789        MatMissingDiagonal_SeqBAIJ
2790 /*114*/
2791 };
2792 
2793 EXTERN_C_BEGIN
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2796 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2797 {
2798   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2799   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2800   PetscErrorCode ierr;
2801 
2802   PetscFunctionBegin;
2803   if (aij->nonew != 1) {
2804     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2805   }
2806 
2807   /* allocate space for values if not already there */
2808   if (!aij->saved_values) {
2809     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2810     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2811   }
2812 
2813   /* copy values over */
2814   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2815   PetscFunctionReturn(0);
2816 }
2817 EXTERN_C_END
2818 
2819 EXTERN_C_BEGIN
2820 #undef __FUNCT__
2821 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2822 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2823 {
2824   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2825   PetscErrorCode ierr;
2826   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2827 
2828   PetscFunctionBegin;
2829   if (aij->nonew != 1) {
2830     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2831   }
2832   if (!aij->saved_values) {
2833     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2834   }
2835 
2836   /* copy values over */
2837   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2838   PetscFunctionReturn(0);
2839 }
2840 EXTERN_C_END
2841 
2842 EXTERN_C_BEGIN
2843 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2844 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2845 EXTERN_C_END
2846 
2847 EXTERN_C_BEGIN
2848 #undef __FUNCT__
2849 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2850 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2851 {
2852   Mat_SeqBAIJ    *b;
2853   PetscErrorCode ierr;
2854   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2855   PetscTruth     flg,skipallocation = PETSC_FALSE;
2856 
2857   PetscFunctionBegin;
2858 
2859   if (nz == MAT_SKIP_ALLOCATION) {
2860     skipallocation = PETSC_TRUE;
2861     nz             = 0;
2862   }
2863 
2864   if (bs < 0) {
2865     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2866       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2867     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2868     bs   = PetscAbs(bs);
2869   }
2870   if (nnz && newbs != bs) {
2871     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2872   }
2873   bs   = newbs;
2874 
2875   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2876   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2877   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2878   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2879 
2880   B->preallocated = PETSC_TRUE;
2881 
2882   mbs  = B->rmap->n/bs;
2883   nbs  = B->cmap->n/bs;
2884   bs2  = bs*bs;
2885 
2886   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2887     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2888   }
2889 
2890   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2891   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2892   if (nnz) {
2893     for (i=0; i<mbs; i++) {
2894       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2895       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2896     }
2897   }
2898 
2899   b       = (Mat_SeqBAIJ*)B->data;
2900   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2901     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2902   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2903 
2904   if (!flg) {
2905     switch (bs) {
2906     case 1:
2907       B->ops->mult            = MatMult_SeqBAIJ_1;
2908       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2909       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2910       break;
2911     case 2:
2912       B->ops->mult            = MatMult_SeqBAIJ_2;
2913       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2914       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2915       break;
2916     case 3:
2917       B->ops->mult            = MatMult_SeqBAIJ_3;
2918       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2919       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2920       break;
2921     case 4:
2922       B->ops->mult            = MatMult_SeqBAIJ_4;
2923       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2924       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2925       break;
2926     case 5:
2927       B->ops->mult            = MatMult_SeqBAIJ_5;
2928       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2929       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2930       break;
2931     case 6:
2932       B->ops->mult            = MatMult_SeqBAIJ_6;
2933       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2934       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2935       break;
2936     case 7:
2937       B->ops->mult            = MatMult_SeqBAIJ_7;
2938       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2939       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2940       break;
2941     default:
2942       B->ops->mult            = MatMult_SeqBAIJ_N;
2943       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2944       break;
2945     }
2946   }
2947   B->rmap->bs      = bs;
2948   b->mbs     = mbs;
2949   b->nbs     = nbs;
2950   if (!skipallocation) {
2951     if (!b->imax) {
2952       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2953       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
2954       b->free_imax_ilen = PETSC_TRUE;
2955     }
2956     /* b->ilen will count nonzeros in each block row so far. */
2957     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2958     if (!nnz) {
2959       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2960       else if (nz <= 0)        nz = 1;
2961       for (i=0; i<mbs; i++) b->imax[i] = nz;
2962       nz = nz*mbs;
2963     } else {
2964       nz = 0;
2965       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2966     }
2967 
2968     /* allocate the matrix space */
2969     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2970     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2971     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2972     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2973     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2974     b->singlemalloc = PETSC_TRUE;
2975     b->i[0] = 0;
2976     for (i=1; i<mbs+1; i++) {
2977       b->i[i] = b->i[i-1] + b->imax[i-1];
2978     }
2979     b->free_a     = PETSC_TRUE;
2980     b->free_ij    = PETSC_TRUE;
2981   } else {
2982     b->free_a     = PETSC_FALSE;
2983     b->free_ij    = PETSC_FALSE;
2984   }
2985 
2986   B->rmap->bs          = bs;
2987   b->bs2              = bs2;
2988   b->mbs              = mbs;
2989   b->nz               = 0;
2990   b->maxnz            = nz*bs2;
2991   B->info.nz_unneeded = (PetscReal)b->maxnz;
2992   PetscFunctionReturn(0);
2993 }
2994 EXTERN_C_END
2995 
2996 EXTERN_C_BEGIN
2997 #undef __FUNCT__
2998 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2999 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3000 {
3001   PetscInt       i,m,nz,nz_max=0,*nnz;
3002   PetscScalar    *values=0;
3003   PetscErrorCode ierr;
3004 
3005   PetscFunctionBegin;
3006 
3007   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3008 
3009   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3010   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3011   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3012   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3013   m = B->rmap->n/bs;
3014 
3015   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3016   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3017   for(i=0; i<m; i++) {
3018     nz = ii[i+1]- ii[i];
3019     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3020     nz_max = PetscMax(nz_max, nz);
3021     nnz[i] = nz;
3022   }
3023   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3024   ierr = PetscFree(nnz);CHKERRQ(ierr);
3025 
3026   values = (PetscScalar*)V;
3027   if (!values) {
3028     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3029     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3030   }
3031   for (i=0; i<m; i++) {
3032     PetscInt          ncols  = ii[i+1] - ii[i];
3033     const PetscInt    *icols = jj + ii[i];
3034     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3035     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3036   }
3037   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3038   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3039   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3040 
3041   PetscFunctionReturn(0);
3042 }
3043 EXTERN_C_END
3044 
3045 
3046 EXTERN_C_BEGIN
3047 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3048 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3049 EXTERN_C_END
3050 
3051 /*MC
3052    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3053    block sparse compressed row format.
3054 
3055    Options Database Keys:
3056 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3057 
3058   Level: beginner
3059 
3060 .seealso: MatCreateSeqBAIJ()
3061 M*/
3062 
3063 
3064 EXTERN_C_BEGIN
3065 #undef __FUNCT__
3066 #define __FUNCT__ "MatCreate_SeqBAIJ"
3067 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3068 {
3069   PetscErrorCode ierr;
3070   PetscMPIInt    size;
3071   Mat_SeqBAIJ    *b;
3072 
3073   PetscFunctionBegin;
3074   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3075   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3076 
3077   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3078   B->data = (void*)b;
3079   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3080   B->mapping               = 0;
3081   b->row                   = 0;
3082   b->col                   = 0;
3083   b->icol                  = 0;
3084   b->reallocs              = 0;
3085   b->saved_values          = 0;
3086 
3087   b->roworiented           = PETSC_TRUE;
3088   b->nonew                 = 0;
3089   b->diag                  = 0;
3090   b->solve_work            = 0;
3091   b->mult_work             = 0;
3092   B->spptr                 = 0;
3093   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3094   b->keepnonzeropattern    = PETSC_FALSE;
3095   b->xtoy                  = 0;
3096   b->XtoY                  = 0;
3097   b->compressedrow.use     = PETSC_FALSE;
3098   b->compressedrow.nrows   = 0;
3099   b->compressedrow.i       = PETSC_NULL;
3100   b->compressedrow.rindex  = PETSC_NULL;
3101   b->compressedrow.checked = PETSC_FALSE;
3102   B->same_nonzero          = PETSC_FALSE;
3103 
3104   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
3105                                      "MatGetFactorAvailable_seqbaij_petsc",
3106                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3107   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
3108                                      "MatGetFactor_seqbaij_petsc",
3109                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3110   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3111                                      "MatInvertBlockDiagonal_SeqBAIJ",
3112                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3113   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3114                                      "MatStoreValues_SeqBAIJ",
3115                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3116   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3117                                      "MatRetrieveValues_SeqBAIJ",
3118                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3119   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3120                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3121                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3122   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3123                                      "MatConvert_SeqBAIJ_SeqAIJ",
3124                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3125   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3126                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3127                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3128   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3129                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3130                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3131   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3132                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3133                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3134   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3135   PetscFunctionReturn(0);
3136 }
3137 EXTERN_C_END
3138 
3139 #undef __FUNCT__
3140 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3141 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3142 {
3143   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3144   PetscErrorCode ierr;
3145   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3146 
3147   PetscFunctionBegin;
3148   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
3149 
3150   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3151     c->imax = a->imax;
3152     c->ilen = a->ilen;
3153     c->free_imax_ilen = PETSC_FALSE;
3154   } else {
3155     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3156     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3157     for (i=0; i<mbs; i++) {
3158       c->imax[i] = a->imax[i];
3159       c->ilen[i] = a->ilen[i];
3160     }
3161     c->free_imax_ilen = PETSC_TRUE;
3162   }
3163 
3164   /* allocate the matrix space */
3165   if (mallocmatspace){
3166     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3167       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3168       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3169       c->singlemalloc = PETSC_FALSE;
3170       c->free_ij      = PETSC_FALSE;
3171       c->i            = a->i;
3172       c->j            = a->j;
3173       c->parent       = A;
3174       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3175       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3176       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3177     } else {
3178       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3179       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3180       c->singlemalloc = PETSC_TRUE;
3181       c->free_ij      = PETSC_TRUE;
3182       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3183       if (mbs > 0) {
3184 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3185 	if (cpvalues == MAT_COPY_VALUES) {
3186 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3187 	} else {
3188 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3189 	}
3190       }
3191     }
3192   }
3193 
3194   c->roworiented = a->roworiented;
3195   c->nonew       = a->nonew;
3196   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
3197   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
3198   c->bs2         = a->bs2;
3199   c->mbs         = a->mbs;
3200   c->nbs         = a->nbs;
3201 
3202   if (a->diag) {
3203     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3204       c->diag      = a->diag;
3205       c->free_diag = PETSC_FALSE;
3206     } else {
3207       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3208       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3209       for (i=0; i<mbs; i++) {
3210         c->diag[i] = a->diag[i];
3211       }
3212       c->free_diag = PETSC_TRUE;
3213     }
3214   } else c->diag        = 0;
3215   c->nz                 = a->nz;
3216   c->maxnz              = a->maxnz;
3217   c->solve_work         = 0;
3218   c->mult_work          = 0;
3219   c->free_a             = PETSC_TRUE;
3220   c->free_ij            = PETSC_TRUE;
3221   C->preallocated       = PETSC_TRUE;
3222   C->assembled          = PETSC_TRUE;
3223 
3224   c->compressedrow.use     = a->compressedrow.use;
3225   c->compressedrow.nrows   = a->compressedrow.nrows;
3226   c->compressedrow.checked = a->compressedrow.checked;
3227   if ( a->compressedrow.checked && a->compressedrow.use){
3228     i = a->compressedrow.nrows;
3229     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
3230     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3231     c->compressedrow.rindex = c->compressedrow.i + i + 1;
3232     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3233     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3234   } else {
3235     c->compressedrow.use    = PETSC_FALSE;
3236     c->compressedrow.i      = PETSC_NULL;
3237     c->compressedrow.rindex = PETSC_NULL;
3238   }
3239   C->same_nonzero = A->same_nonzero;
3240   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3241   PetscFunctionReturn(0);
3242 }
3243 
3244 #undef __FUNCT__
3245 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3246 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3247 {
3248     PetscErrorCode ierr;
3249 
3250   PetscFunctionBegin;
3251   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3252   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3253   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3254   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3255   PetscFunctionReturn(0);
3256 }
3257 
3258 #undef __FUNCT__
3259 #define __FUNCT__ "MatLoad_SeqBAIJ"
3260 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3261 {
3262   Mat_SeqBAIJ    *a;
3263   Mat            B;
3264   PetscErrorCode ierr;
3265   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3266   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3267   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3268   PetscInt       *masked,nmask,tmp,bs2,ishift;
3269   PetscMPIInt    size;
3270   int            fd;
3271   PetscScalar    *aa;
3272   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3273 
3274   PetscFunctionBegin;
3275   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3276     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3277   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3278   bs2  = bs*bs;
3279 
3280   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3281   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
3282   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3283   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3284   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3285   M = header[1]; N = header[2]; nz = header[3];
3286 
3287   if (header[3] < 0) {
3288     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3289   }
3290 
3291   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3292 
3293   /*
3294      This code adds extra rows to make sure the number of rows is
3295     divisible by the blocksize
3296   */
3297   mbs        = M/bs;
3298   extra_rows = bs - M + bs*(mbs);
3299   if (extra_rows == bs) extra_rows = 0;
3300   else                  mbs++;
3301   if (extra_rows) {
3302     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3303   }
3304 
3305   /* read in row lengths */
3306   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3307   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3308   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3309 
3310   /* read in column indices */
3311   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3312   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3313   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3314 
3315   /* loop over row lengths determining block row lengths */
3316   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3317   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3318   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
3319   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3320   masked   = mask + mbs;
3321   rowcount = 0; nzcount = 0;
3322   for (i=0; i<mbs; i++) {
3323     nmask = 0;
3324     for (j=0; j<bs; j++) {
3325       kmax = rowlengths[rowcount];
3326       for (k=0; k<kmax; k++) {
3327         tmp = jj[nzcount++]/bs;
3328         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3329       }
3330       rowcount++;
3331     }
3332     browlengths[i] += nmask;
3333     /* zero out the mask elements we set */
3334     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3335   }
3336 
3337   /* create our matrix */
3338   ierr = MatCreate(comm,&B);
3339   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3340   ierr = MatSetType(B,type);CHKERRQ(ierr);
3341   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3342   a = (Mat_SeqBAIJ*)B->data;
3343 
3344   /* set matrix "i" values */
3345   a->i[0] = 0;
3346   for (i=1; i<= mbs; i++) {
3347     a->i[i]      = a->i[i-1] + browlengths[i-1];
3348     a->ilen[i-1] = browlengths[i-1];
3349   }
3350   a->nz         = 0;
3351   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3352 
3353   /* read in nonzero values */
3354   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3355   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3356   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3357 
3358   /* set "a" and "j" values into matrix */
3359   nzcount = 0; jcount = 0;
3360   for (i=0; i<mbs; i++) {
3361     nzcountb = nzcount;
3362     nmask    = 0;
3363     for (j=0; j<bs; j++) {
3364       kmax = rowlengths[i*bs+j];
3365       for (k=0; k<kmax; k++) {
3366         tmp = jj[nzcount++]/bs;
3367 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3368       }
3369     }
3370     /* sort the masked values */
3371     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3372 
3373     /* set "j" values into matrix */
3374     maskcount = 1;
3375     for (j=0; j<nmask; j++) {
3376       a->j[jcount++]  = masked[j];
3377       mask[masked[j]] = maskcount++;
3378     }
3379     /* set "a" values into matrix */
3380     ishift = bs2*a->i[i];
3381     for (j=0; j<bs; j++) {
3382       kmax = rowlengths[i*bs+j];
3383       for (k=0; k<kmax; k++) {
3384         tmp       = jj[nzcountb]/bs ;
3385         block     = mask[tmp] - 1;
3386         point     = jj[nzcountb] - bs*tmp;
3387         idx       = ishift + bs2*block + j + bs*point;
3388         a->a[idx] = (MatScalar)aa[nzcountb++];
3389       }
3390     }
3391     /* zero out the mask elements we set */
3392     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3393   }
3394   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3395 
3396   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3397   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3398   ierr = PetscFree(aa);CHKERRQ(ierr);
3399   ierr = PetscFree(jj);CHKERRQ(ierr);
3400   ierr = PetscFree(mask);CHKERRQ(ierr);
3401 
3402   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3403   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3404   ierr = MatView_Private(B);CHKERRQ(ierr);
3405 
3406   *A = B;
3407   PetscFunctionReturn(0);
3408 }
3409 
3410 #undef __FUNCT__
3411 #define __FUNCT__ "MatCreateSeqBAIJ"
3412 /*@C
3413    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3414    compressed row) format.  For good matrix assembly performance the
3415    user should preallocate the matrix storage by setting the parameter nz
3416    (or the array nnz).  By setting these parameters accurately, performance
3417    during matrix assembly can be increased by more than a factor of 50.
3418 
3419    Collective on MPI_Comm
3420 
3421    Input Parameters:
3422 +  comm - MPI communicator, set to PETSC_COMM_SELF
3423 .  bs - size of block
3424 .  m - number of rows
3425 .  n - number of columns
3426 .  nz - number of nonzero blocks  per block row (same for all rows)
3427 -  nnz - array containing the number of nonzero blocks in the various block rows
3428          (possibly different for each block row) or PETSC_NULL
3429 
3430    Output Parameter:
3431 .  A - the matrix
3432 
3433    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3434    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3435    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3436 
3437    Options Database Keys:
3438 .   -mat_no_unroll - uses code that does not unroll the loops in the
3439                      block calculations (much slower)
3440 .    -mat_block_size - size of the blocks to use
3441 
3442    Level: intermediate
3443 
3444    Notes:
3445    The number of rows and columns must be divisible by blocksize.
3446 
3447    If the nnz parameter is given then the nz parameter is ignored
3448 
3449    A nonzero block is any block that as 1 or more nonzeros in it
3450 
3451    The block AIJ format is fully compatible with standard Fortran 77
3452    storage.  That is, the stored row and column indices can begin at
3453    either one (as in Fortran) or zero.  See the users' manual for details.
3454 
3455    Specify the preallocated storage with either nz or nnz (not both).
3456    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3457    allocation.  For additional details, see the users manual chapter on
3458    matrices.
3459 
3460 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3461 @*/
3462 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3463 {
3464   PetscErrorCode ierr;
3465 
3466   PetscFunctionBegin;
3467   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3468   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3469   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3470   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3471   PetscFunctionReturn(0);
3472 }
3473 
3474 #undef __FUNCT__
3475 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3476 /*@C
3477    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3478    per row in the matrix. For good matrix assembly performance the
3479    user should preallocate the matrix storage by setting the parameter nz
3480    (or the array nnz).  By setting these parameters accurately, performance
3481    during matrix assembly can be increased by more than a factor of 50.
3482 
3483    Collective on MPI_Comm
3484 
3485    Input Parameters:
3486 +  A - the matrix
3487 .  bs - size of block
3488 .  nz - number of block nonzeros per block row (same for all rows)
3489 -  nnz - array containing the number of block nonzeros in the various block rows
3490          (possibly different for each block row) or PETSC_NULL
3491 
3492    Options Database Keys:
3493 .   -mat_no_unroll - uses code that does not unroll the loops in the
3494                      block calculations (much slower)
3495 .    -mat_block_size - size of the blocks to use
3496 
3497    Level: intermediate
3498 
3499    Notes:
3500    If the nnz parameter is given then the nz parameter is ignored
3501 
3502    You can call MatGetInfo() to get information on how effective the preallocation was;
3503    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3504    You can also run with the option -info and look for messages with the string
3505    malloc in them to see if additional memory allocation was needed.
3506 
3507    The block AIJ format is fully compatible with standard Fortran 77
3508    storage.  That is, the stored row and column indices can begin at
3509    either one (as in Fortran) or zero.  See the users' manual for details.
3510 
3511    Specify the preallocated storage with either nz or nnz (not both).
3512    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3513    allocation.  For additional details, see the users manual chapter on
3514    matrices.
3515 
3516 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3517 @*/
3518 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3519 {
3520   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3521 
3522   PetscFunctionBegin;
3523   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3524   if (f) {
3525     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3526   }
3527   PetscFunctionReturn(0);
3528 }
3529 
3530 #undef __FUNCT__
3531 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3532 /*@C
3533    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3534    (the default sequential PETSc format).
3535 
3536    Collective on MPI_Comm
3537 
3538    Input Parameters:
3539 +  A - the matrix
3540 .  i - the indices into j for the start of each local row (starts with zero)
3541 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3542 -  v - optional values in the matrix
3543 
3544    Level: developer
3545 
3546 .keywords: matrix, aij, compressed row, sparse
3547 
3548 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3549 @*/
3550 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3551 {
3552   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3553 
3554   PetscFunctionBegin;
3555   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3556   if (f) {
3557     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3558   }
3559   PetscFunctionReturn(0);
3560 }
3561 
3562 
3563 #undef __FUNCT__
3564 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3565 /*@
3566      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3567               (upper triangular entries in CSR format) provided by the user.
3568 
3569      Collective on MPI_Comm
3570 
3571    Input Parameters:
3572 +  comm - must be an MPI communicator of size 1
3573 .  bs - size of block
3574 .  m - number of rows
3575 .  n - number of columns
3576 .  i - row indices
3577 .  j - column indices
3578 -  a - matrix values
3579 
3580    Output Parameter:
3581 .  mat - the matrix
3582 
3583    Level: intermediate
3584 
3585    Notes:
3586        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3587     once the matrix is destroyed
3588 
3589        You cannot set new nonzero locations into this matrix, that will generate an error.
3590 
3591        The i and j indices are 0 based
3592 
3593 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3594 
3595 @*/
3596 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3597 {
3598   PetscErrorCode ierr;
3599   PetscInt       ii;
3600   Mat_SeqBAIJ    *baij;
3601 
3602   PetscFunctionBegin;
3603   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3604   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3605 
3606   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3607   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3608   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3609   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3610   baij = (Mat_SeqBAIJ*)(*mat)->data;
3611   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3612   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3613 
3614   baij->i = i;
3615   baij->j = j;
3616   baij->a = a;
3617   baij->singlemalloc = PETSC_FALSE;
3618   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3619   baij->free_a       = PETSC_FALSE;
3620   baij->free_ij       = PETSC_FALSE;
3621 
3622   for (ii=0; ii<m; ii++) {
3623     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3624 #if defined(PETSC_USE_DEBUG)
3625     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3626 #endif
3627   }
3628 #if defined(PETSC_USE_DEBUG)
3629   for (ii=0; ii<baij->i[m]; ii++) {
3630     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3631     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3632   }
3633 #endif
3634 
3635   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3636   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3637   PetscFunctionReturn(0);
3638 }
3639