xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 97e5c40a63d3f963eb379f22da5ce69dc304f982)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   MatScalar     *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
118 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,s1;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0];
146       i2     = 1;
147       idiag += 1;
148       for (i=1; i<m; i++) {
149         v     = aa + ai[i];
150         vi    = aj + ai[i];
151         nz    = diag[i] - ai[i];
152         s1    = b[i2];
153         while (nz--) {
154           idx  = (*vi++);
155           x1   = x[idx];
156           s1  -= v[0]*x1;
157           v   += 1;
158         }
159         x[i2]   = idiag[0]*s1;
160         idiag   += 1;
161         i2      += 1;
162       }
163       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
164       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
165     }
166     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
167         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
168       i2    = 0;
169       mdiag = a->idiag+a->mbs;
170       for (i=0; i<m; i++) {
171         x1      = x[i2];
172         x[i2]   = mdiag[0]*x1;
173         mdiag  += 1;
174         i2     += 1;
175       }
176       ierr = PetscLogFlops(m);CHKERRQ(ierr);
177     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
178       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
179     }
180     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
181       idiag   = a->idiag+a->mbs - 1;
182       i2      = m - 1;
183       x1      = x[i2];
184       x[i2]   = idiag[0]*x1;
185       idiag -= 1;
186       i2    -= 1;
187       for (i=m-2; i>=0; i--) {
188         v     = aa + (diag[i]+1);
189         vi    = aj + diag[i] + 1;
190         nz    = ai[i+1] - diag[i] - 1;
191         s1    = x[i2];
192         while (nz--) {
193           idx  = (*vi++);
194           x1   = x[idx];
195           s1  -= v[0]*x1;
196           v   += 1;
197         }
198         x[i2]   = idiag[0]*s1;
199         idiag   -= 2;
200         i2      -= 1;
201       }
202       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
203     }
204   } else {
205     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
206   }
207   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
208   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
214 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
215 {
216   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
217   PetscScalar        *x,x1,x2,s1,s2;
218   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
219   PetscErrorCode     ierr;
220   PetscInt           m = a->mbs,i,i2,nz,idx;
221   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
222 
223   PetscFunctionBegin;
224   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
225   its = its*lits;
226   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
227   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
228   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
229   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
230   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
231 
232   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
233 
234   diag  = a->diag;
235   idiag = a->idiag;
236   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
237   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
238 
239   if (flag & SOR_ZERO_INITIAL_GUESS) {
240     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
241       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
242       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
243       i2     = 2;
244       idiag += 4;
245       for (i=1; i<m; i++) {
246 	v     = aa + 4*ai[i];
247 	vi    = aj + ai[i];
248 	nz    = diag[i] - ai[i];
249 	s1    = b[i2]; s2 = b[i2+1];
250 	while (nz--) {
251 	  idx  = 2*(*vi++);
252 	  x1   = x[idx]; x2 = x[1+idx];
253 	  s1  -= v[0]*x1 + v[2]*x2;
254 	  s2  -= v[1]*x1 + v[3]*x2;
255 	  v   += 4;
256 	}
257 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
258 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
259         idiag   += 4;
260         i2      += 2;
261       }
262       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
263       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
264     }
265     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
266         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
267       i2    = 0;
268       mdiag = a->idiag+4*a->mbs;
269       for (i=0; i<m; i++) {
270         x1      = x[i2]; x2 = x[i2+1];
271         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
272         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
273         mdiag  += 4;
274         i2     += 2;
275       }
276       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
277     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
278       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
279     }
280     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
281       idiag   = a->idiag+4*a->mbs - 4;
282       i2      = 2*m - 2;
283       x1      = x[i2]; x2 = x[i2+1];
284       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
285       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
286       idiag -= 4;
287       i2    -= 2;
288       for (i=m-2; i>=0; i--) {
289 	v     = aa + 4*(diag[i]+1);
290 	vi    = aj + diag[i] + 1;
291 	nz    = ai[i+1] - diag[i] - 1;
292 	s1    = x[i2]; s2 = x[i2+1];
293 	while (nz--) {
294 	  idx  = 2*(*vi++);
295 	  x1   = x[idx]; x2 = x[1+idx];
296 	  s1  -= v[0]*x1 + v[2]*x2;
297 	  s2  -= v[1]*x1 + v[3]*x2;
298 	  v   += 4;
299 	}
300 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
301 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
302         idiag   -= 4;
303         i2      -= 2;
304       }
305       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
306     }
307   } else {
308     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
309   }
310   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
311   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
317 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
318 {
319   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
320   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
321   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
322   PetscErrorCode     ierr;
323   PetscInt           m = a->mbs,i,i2,nz,idx;
324   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
325 
326   PetscFunctionBegin;
327   its = its*lits;
328   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
329   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
330   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
331   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
332   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
333   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
334 
335   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
336 
337   diag  = a->diag;
338   idiag = a->idiag;
339   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
340   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
341 
342   if (flag & SOR_ZERO_INITIAL_GUESS) {
343     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
344       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
345       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
346       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
347       i2     = 3;
348       idiag += 9;
349       for (i=1; i<m; i++) {
350         v     = aa + 9*ai[i];
351         vi    = aj + ai[i];
352         nz    = diag[i] - ai[i];
353         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
354         while (nz--) {
355           idx  = 3*(*vi++);
356           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
357           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
358           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
359           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
360           v   += 9;
361         }
362         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
363         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
364         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
365         idiag   += 9;
366         i2      += 3;
367       }
368       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
369       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
370     }
371     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
372         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
373       i2    = 0;
374       mdiag = a->idiag+9*a->mbs;
375       for (i=0; i<m; i++) {
376         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
377         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
378         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
379         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
380         mdiag  += 9;
381         i2     += 3;
382       }
383       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
384     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
385       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
386     }
387     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
388       idiag   = a->idiag+9*a->mbs - 9;
389       i2      = 3*m - 3;
390       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
391       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
392       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
393       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
394       idiag -= 9;
395       i2    -= 3;
396       for (i=m-2; i>=0; i--) {
397         v     = aa + 9*(diag[i]+1);
398         vi    = aj + diag[i] + 1;
399         nz    = ai[i+1] - diag[i] - 1;
400         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
401         while (nz--) {
402           idx  = 3*(*vi++);
403           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
404           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
405           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
406           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
407           v   += 9;
408         }
409         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
410         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
411         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
412         idiag   -= 9;
413         i2      -= 3;
414       }
415       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
416     }
417   } else {
418     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
419   }
420   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
421   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
427 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
428 {
429   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
430   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
431   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
432   PetscErrorCode     ierr;
433   PetscInt           m = a->mbs,i,i2,nz,idx;
434   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
435 
436   PetscFunctionBegin;
437   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
438   its = its*lits;
439   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
440   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
441   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
442   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
443   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
444 
445   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
446 
447   diag  = a->diag;
448   idiag = a->idiag;
449   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
450   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
451 
452   if (flag & SOR_ZERO_INITIAL_GUESS) {
453     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
454       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
455       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
456       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
457       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
458       i2     = 4;
459       idiag += 16;
460       for (i=1; i<m; i++) {
461 	v     = aa + 16*ai[i];
462 	vi    = aj + ai[i];
463 	nz    = diag[i] - ai[i];
464 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
465 	while (nz--) {
466 	  idx  = 4*(*vi++);
467 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
468 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
469 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
470 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
471 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
472 	  v   += 16;
473 	}
474 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
475 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
476 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
477 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
478         idiag   += 16;
479         i2      += 4;
480       }
481       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
482       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
483     }
484     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
485         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
486       i2    = 0;
487       mdiag = a->idiag+16*a->mbs;
488       for (i=0; i<m; i++) {
489         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
490         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
491         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
492         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
493         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
494         mdiag  += 16;
495         i2     += 4;
496       }
497       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
498     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
499       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
500     }
501     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
502       idiag   = a->idiag+16*a->mbs - 16;
503       i2      = 4*m - 4;
504       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
505       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
506       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
507       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
508       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
509       idiag -= 16;
510       i2    -= 4;
511       for (i=m-2; i>=0; i--) {
512 	v     = aa + 16*(diag[i]+1);
513 	vi    = aj + diag[i] + 1;
514 	nz    = ai[i+1] - diag[i] - 1;
515 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
516 	while (nz--) {
517 	  idx  = 4*(*vi++);
518 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
519 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
520 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
521 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
522 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
523 	  v   += 16;
524 	}
525 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
526 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
527 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
528 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
529         idiag   -= 16;
530         i2      -= 4;
531       }
532       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
533     }
534   } else {
535     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
536   }
537   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
538   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
539   PetscFunctionReturn(0);
540 }
541 
542 #undef __FUNCT__
543 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
544 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
545 {
546   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
547   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
548   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
549   PetscErrorCode     ierr;
550   PetscInt           m = a->mbs,i,i2,nz,idx;
551   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
552 
553   PetscFunctionBegin;
554   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
555   its = its*lits;
556   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
557   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
558   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
559   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
560   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
561 
562   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
563 
564   diag  = a->diag;
565   idiag = a->idiag;
566   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
567   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
568 
569   if (flag & SOR_ZERO_INITIAL_GUESS) {
570     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
571       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
572       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
573       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
574       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
575       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
576       i2     = 5;
577       idiag += 25;
578       for (i=1; i<m; i++) {
579 	v     = aa + 25*ai[i];
580 	vi    = aj + ai[i];
581 	nz    = diag[i] - ai[i];
582 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
583 	while (nz--) {
584 	  idx  = 5*(*vi++);
585 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
586 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
587 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
588 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
589 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
590 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
591 	  v   += 25;
592 	}
593 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
594 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
595 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
596 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
597 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
598         idiag   += 25;
599         i2      += 5;
600       }
601       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
602       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
603     }
604     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
605         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
606       i2    = 0;
607       mdiag = a->idiag+25*a->mbs;
608       for (i=0; i<m; i++) {
609         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
610         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
611         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
612         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
613         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
614         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
615         mdiag  += 25;
616         i2     += 5;
617       }
618       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
619     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
620       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
621     }
622     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
623       idiag   = a->idiag+25*a->mbs - 25;
624       i2      = 5*m - 5;
625       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
626       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
627       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
628       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
629       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
630       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
631       idiag -= 25;
632       i2    -= 5;
633       for (i=m-2; i>=0; i--) {
634 	v     = aa + 25*(diag[i]+1);
635 	vi    = aj + diag[i] + 1;
636 	nz    = ai[i+1] - diag[i] - 1;
637 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
638 	while (nz--) {
639 	  idx  = 5*(*vi++);
640 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
641 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
642 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
643 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
644 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
645 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
646 	  v   += 25;
647 	}
648 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
649 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
650 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
651 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
652 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
653         idiag   -= 25;
654         i2      -= 5;
655       }
656       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
657     }
658   } else {
659     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
660   }
661   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
662   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
668 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
669 {
670   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
671   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
672   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
673   PetscErrorCode     ierr;
674   PetscInt           m = a->mbs,i,i2,nz,idx;
675   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
676 
677   PetscFunctionBegin;
678   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
679   its = its*lits;
680   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
681   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
682   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
683   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
684   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
685 
686   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
687 
688   diag  = a->diag;
689   idiag = a->idiag;
690   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
691   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
692 
693   if (flag & SOR_ZERO_INITIAL_GUESS) {
694     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
695       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
696       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
697       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
698       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
699       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
700       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
701       i2     = 6;
702       idiag += 36;
703       for (i=1; i<m; i++) {
704         v     = aa + 36*ai[i];
705         vi    = aj + ai[i];
706         nz    = diag[i] - ai[i];
707         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
708         while (nz--) {
709           idx  = 6*(*vi++);
710           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
711           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
712           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
713           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
714           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
715           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
716           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
717           v   += 36;
718         }
719         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
720         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
721         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
722         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
723         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
724         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
725         idiag   += 36;
726         i2      += 6;
727       }
728       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
729       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
730     }
731     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
732         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
733       i2    = 0;
734       mdiag = a->idiag+36*a->mbs;
735       for (i=0; i<m; i++) {
736         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
737         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
738         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
739         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
740         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
741         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
742         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
743         mdiag  += 36;
744         i2     += 6;
745       }
746       ierr = PetscLogFlops(60*m);CHKERRQ(ierr);
747     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
748       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
749     }
750     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
751       idiag   = a->idiag+36*a->mbs - 36;
752       i2      = 6*m - 6;
753       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
754       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
755       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
756       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
757       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
758       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
759       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
760       idiag -= 36;
761       i2    -= 6;
762       for (i=m-2; i>=0; i--) {
763         v     = aa + 36*(diag[i]+1);
764         vi    = aj + diag[i] + 1;
765         nz    = ai[i+1] - diag[i] - 1;
766         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
767         while (nz--) {
768           idx  = 6*(*vi++);
769           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
770           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
771           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
772           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
773           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
774           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
775           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
776           v   += 36;
777         }
778         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
779         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
780         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
781         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
782         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
783         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
784         idiag   -= 36;
785         i2      -= 6;
786       }
787       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
788     }
789   } else {
790     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
791   }
792   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
793   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
799 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
800 {
801   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
802   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
803   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
804   PetscErrorCode     ierr;
805   PetscInt           m = a->mbs,i,i2,nz,idx;
806   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
807 
808   PetscFunctionBegin;
809   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
810   its = its*lits;
811   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
812   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
813   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
814   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
815   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
816 
817   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
818 
819   diag  = a->diag;
820   idiag = a->idiag;
821   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
822   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
823 
824   if (flag & SOR_ZERO_INITIAL_GUESS) {
825     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
826       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
827       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
828       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
829       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
830       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
831       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
832       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
833       i2     = 7;
834       idiag += 49;
835       for (i=1; i<m; i++) {
836         v     = aa + 49*ai[i];
837         vi    = aj + ai[i];
838         nz    = diag[i] - ai[i];
839         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
840         while (nz--) {
841           idx  = 7*(*vi++);
842           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
843           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
844           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
845           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
846           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
847           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
848           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
849           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
850           v   += 49;
851         }
852         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
853         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
854         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
855         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
856         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
857         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
858         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
859         idiag   += 49;
860         i2      += 7;
861       }
862       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
863       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
864     }
865     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
866         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
867       i2    = 0;
868       mdiag = a->idiag+49*a->mbs;
869       for (i=0; i<m; i++) {
870         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
871         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
872         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
873         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
874         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
875         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
876         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
877         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
878         mdiag  += 36;
879         i2     += 6;
880       }
881       ierr = PetscLogFlops(93*m);CHKERRQ(ierr);
882     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
883       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
884     }
885     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
886       idiag   = a->idiag+49*a->mbs - 49;
887       i2      = 7*m - 7;
888       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
889       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
890       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
891       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
892       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
893       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
894       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
895       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
896       idiag -= 49;
897       i2    -= 7;
898       for (i=m-2; i>=0; i--) {
899         v     = aa + 49*(diag[i]+1);
900         vi    = aj + diag[i] + 1;
901         nz    = ai[i+1] - diag[i] - 1;
902         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
903         while (nz--) {
904           idx  = 7*(*vi++);
905           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
906           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
907           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
908           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
909           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
910           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
911           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
912           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
913           v   += 49;
914         }
915         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
916         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
917         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
918         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
919         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
920         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
921         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
922         idiag   -= 49;
923         i2      -= 7;
924       }
925       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
926     }
927   } else {
928     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
929   }
930   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
931   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
932   PetscFunctionReturn(0);
933 }
934 
935 /*
936     Special version for direct calls from Fortran (Used in PETSc-fun3d)
937 */
938 #if defined(PETSC_HAVE_FORTRAN_CAPS)
939 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
940 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
941 #define matsetvaluesblocked4_ matsetvaluesblocked4
942 #endif
943 
944 EXTERN_C_BEGIN
945 #undef __FUNCT__
946 #define __FUNCT__ "matsetvaluesblocked4_"
947 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
948 {
949   Mat               A = *AA;
950   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
951   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
952   PetscInt          *ai=a->i,*ailen=a->ilen;
953   PetscInt          *aj=a->j,stepval,lastcol = -1;
954   const PetscScalar *value = v;
955   MatScalar         *ap,*aa = a->a,*bap;
956 
957   PetscFunctionBegin;
958   if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
959   stepval = (n-1)*4;
960   for (k=0; k<m; k++) { /* loop over added rows */
961     row  = im[k];
962     rp   = aj + ai[row];
963     ap   = aa + 16*ai[row];
964     nrow = ailen[row];
965     low  = 0;
966     high = nrow;
967     for (l=0; l<n; l++) { /* loop over added columns */
968       col = in[l];
969       if (col <= lastcol) low = 0; else high = nrow;
970       lastcol = col;
971       value = v + k*(stepval+4 + l)*4;
972       while (high-low > 7) {
973         t = (low+high)/2;
974         if (rp[t] > col) high = t;
975         else             low  = t;
976       }
977       for (i=low; i<high; i++) {
978         if (rp[i] > col) break;
979         if (rp[i] == col) {
980           bap  = ap +  16*i;
981           for (ii=0; ii<4; ii++,value+=stepval) {
982             for (jj=ii; jj<16; jj+=4) {
983               bap[jj] += *value++;
984             }
985           }
986           goto noinsert2;
987         }
988       }
989       N = nrow++ - 1;
990       high++; /* added new column index thus must search to one higher than before */
991       /* shift up all the later entries in this row */
992       for (ii=N; ii>=i; ii--) {
993         rp[ii+1] = rp[ii];
994         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
995       }
996       if (N >= i) {
997         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
998       }
999       rp[i] = col;
1000       bap   = ap +  16*i;
1001       for (ii=0; ii<4; ii++,value+=stepval) {
1002         for (jj=ii; jj<16; jj+=4) {
1003           bap[jj] = *value++;
1004         }
1005       }
1006       noinsert2:;
1007       low = i;
1008     }
1009     ailen[row] = nrow;
1010   }
1011   PetscFunctionReturnVoid();
1012 }
1013 EXTERN_C_END
1014 
1015 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1016 #define matsetvalues4_ MATSETVALUES4
1017 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1018 #define matsetvalues4_ matsetvalues4
1019 #endif
1020 
1021 EXTERN_C_BEGIN
1022 #undef __FUNCT__
1023 #define __FUNCT__ "MatSetValues4_"
1024 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1025 {
1026   Mat         A = *AA;
1027   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1028   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1029   PetscInt    *ai=a->i,*ailen=a->ilen;
1030   PetscInt    *aj=a->j,brow,bcol;
1031   PetscInt    ridx,cidx,lastcol = -1;
1032   MatScalar   *ap,value,*aa=a->a,*bap;
1033 
1034   PetscFunctionBegin;
1035   for (k=0; k<m; k++) { /* loop over added rows */
1036     row  = im[k]; brow = row/4;
1037     rp   = aj + ai[brow];
1038     ap   = aa + 16*ai[brow];
1039     nrow = ailen[brow];
1040     low  = 0;
1041     high = nrow;
1042     for (l=0; l<n; l++) { /* loop over added columns */
1043       col = in[l]; bcol = col/4;
1044       ridx = row % 4; cidx = col % 4;
1045       value = v[l + k*n];
1046       if (col <= lastcol) low = 0; else high = nrow;
1047       lastcol = col;
1048       while (high-low > 7) {
1049         t = (low+high)/2;
1050         if (rp[t] > bcol) high = t;
1051         else              low  = t;
1052       }
1053       for (i=low; i<high; i++) {
1054         if (rp[i] > bcol) break;
1055         if (rp[i] == bcol) {
1056           bap  = ap +  16*i + 4*cidx + ridx;
1057           *bap += value;
1058           goto noinsert1;
1059         }
1060       }
1061       N = nrow++ - 1;
1062       high++; /* added new column thus must search to one higher than before */
1063       /* shift up all the later entries in this row */
1064       for (ii=N; ii>=i; ii--) {
1065         rp[ii+1] = rp[ii];
1066         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1067       }
1068       if (N>=i) {
1069         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1070       }
1071       rp[i]                    = bcol;
1072       ap[16*i + 4*cidx + ridx] = value;
1073       noinsert1:;
1074       low = i;
1075     }
1076     ailen[brow] = nrow;
1077   }
1078   PetscFunctionReturnVoid();
1079 }
1080 EXTERN_C_END
1081 
1082 /*
1083      Checks for missing diagonals
1084 */
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1087 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1088 {
1089   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1090   PetscErrorCode ierr;
1091   PetscInt       *diag,*jj = a->j,i;
1092 
1093   PetscFunctionBegin;
1094   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1095   *missing = PETSC_FALSE;
1096   diag     = a->diag;
1097   for (i=0; i<a->mbs; i++) {
1098     if (jj[diag[i]] != i) {
1099       *missing  = PETSC_TRUE;
1100       if (d) *d = i;
1101     }
1102   }
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1108 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1109 {
1110   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1111   PetscErrorCode ierr;
1112   PetscInt       i,j,m = a->mbs;
1113 
1114   PetscFunctionBegin;
1115   if (!a->diag) {
1116     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1117   }
1118   for (i=0; i<m; i++) {
1119     a->diag[i] = a->i[i+1];
1120     for (j=a->i[i]; j<a->i[i+1]; j++) {
1121       if (a->j[j] == i) {
1122         a->diag[i] = j;
1123         break;
1124       }
1125     }
1126   }
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 
1131 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1135 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1136 {
1137   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1138   PetscErrorCode ierr;
1139   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt;
1140   PetscInt       *tia, *tja;
1141 
1142   PetscFunctionBegin;
1143   *nn = n;
1144   if (!ia) PetscFunctionReturn(0);
1145   if (symmetric) {
1146     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1147   } else {
1148     tia = a->i; tja = a->j;
1149   }
1150 
1151   if (!blockcompressed && bs > 1) {
1152     (*nn) *= bs;
1153     nbs    = bs;
1154     /* malloc & create the natural set of indices */
1155     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1156     if (n) {
1157       (*ia)[0] = 0;
1158       for (j=1; j<bs; j++) {
1159         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1160       }
1161     }
1162 
1163     for (i=1; i<n; i++) {
1164       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1165       for (j=1; j<bs; j++) {
1166         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1167       }
1168     }
1169     if (n) {
1170       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1171     }
1172 
1173     if (ja) {
1174       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1175       cnt = 0;
1176       for (i=0; i<n; i++) {
1177         for (j=0; j<bs; j++) {
1178           for (k=tia[i]; k<tia[i+1]; k++) {
1179             for (l=0; l<bs; l++) {
1180               (*ja)[cnt++] = bs*tja[k] + l;
1181 	    }
1182           }
1183         }
1184       }
1185     }
1186 
1187     n     *= bs;
1188     nz *= bs*bs;
1189     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1190       ierr = PetscFree(tia);CHKERRQ(ierr);
1191       ierr = PetscFree(tja);CHKERRQ(ierr);
1192     }
1193   } else {
1194     *ia = tia;
1195     if (ja) *ja = tja;
1196   }
1197   if (oshift == 1) {
1198     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1199     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1200   }
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 #undef __FUNCT__
1205 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1206 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1207 {
1208   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1209   PetscErrorCode ierr;
1210   PetscInt       i,n = a->mbs,nz = a->i[n];
1211 
1212   PetscFunctionBegin;
1213   if (!ia) PetscFunctionReturn(0);
1214   if (!blockcompressed && A->rmap.bs > 1) {
1215     ierr = PetscFree(*ia);CHKERRQ(ierr);
1216     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1217   } else if (symmetric) {
1218     ierr = PetscFree(*ia);CHKERRQ(ierr);
1219     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1220   } else if (oshift == 1) { /* blockcompressed */
1221     for (i=0; i<n+1; i++) a->i[i]--;
1222     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1223   }
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1229 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1230 {
1231   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235 #if defined(PETSC_USE_LOG)
1236   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
1237 #endif
1238   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1239   if (a->row) {
1240     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1241   }
1242   if (a->col) {
1243     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1244   }
1245   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1246   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1247   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1248   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1249   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1250   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1251   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1252   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1253   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1254 
1255   ierr = PetscFree(a);CHKERRQ(ierr);
1256 
1257   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1258   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1259   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1260   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1261   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1262   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1263   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1264   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1270 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1271 {
1272   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276   switch (op) {
1277   case MAT_ROW_ORIENTED:
1278     a->roworiented    = flg;
1279     break;
1280   case MAT_KEEP_ZEROED_ROWS:
1281     a->keepzeroedrows = flg;
1282     break;
1283   case MAT_NEW_NONZERO_LOCATIONS:
1284     a->nonew          = (flg ? 0 : 1);
1285     break;
1286   case MAT_NEW_NONZERO_LOCATION_ERR:
1287     a->nonew          = (flg ? -1 : 0);
1288     break;
1289   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1290     a->nonew          = (flg ? -2 : 0);
1291     break;
1292   case MAT_NEW_DIAGONALS:
1293   case MAT_IGNORE_OFF_PROC_ENTRIES:
1294   case MAT_USE_HASH_TABLE:
1295     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1296     break;
1297   case MAT_SYMMETRIC:
1298   case MAT_STRUCTURALLY_SYMMETRIC:
1299   case MAT_HERMITIAN:
1300   case MAT_SYMMETRY_ETERNAL:
1301     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1302     break;
1303   default:
1304     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1311 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1312 {
1313   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1316   MatScalar      *aa,*aa_i;
1317   PetscScalar    *v_i;
1318 
1319   PetscFunctionBegin;
1320   bs  = A->rmap.bs;
1321   ai  = a->i;
1322   aj  = a->j;
1323   aa  = a->a;
1324   bs2 = a->bs2;
1325 
1326   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1327 
1328   bn  = row/bs;   /* Block number */
1329   bp  = row % bs; /* Block Position */
1330   M   = ai[bn+1] - ai[bn];
1331   *nz = bs*M;
1332 
1333   if (v) {
1334     *v = 0;
1335     if (*nz) {
1336       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1337       for (i=0; i<M; i++) { /* for each block in the block row */
1338         v_i  = *v + i*bs;
1339         aa_i = aa + bs2*(ai[bn] + i);
1340         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1341       }
1342     }
1343   }
1344 
1345   if (idx) {
1346     *idx = 0;
1347     if (*nz) {
1348       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1349       for (i=0; i<M; i++) { /* for each block in the block row */
1350         idx_i = *idx + i*bs;
1351         itmp  = bs*aj[ai[bn] + i];
1352         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1353       }
1354     }
1355   }
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1361 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1362 {
1363   PetscErrorCode ierr;
1364 
1365   PetscFunctionBegin;
1366   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1367   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1373 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1374 {
1375   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1376   Mat            C;
1377   PetscErrorCode ierr;
1378   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1379   PetscInt       *rows,*cols,bs2=a->bs2;
1380   PetscScalar    *array;
1381 
1382   PetscFunctionBegin;
1383   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1384   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1385   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1386 
1387   array = a->a;
1388   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1389   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1390   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1391   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1392   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1393   ierr = PetscFree(col);CHKERRQ(ierr);
1394   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1395   cols = rows + bs;
1396   for (i=0; i<mbs; i++) {
1397     cols[0] = i*bs;
1398     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1399     len = ai[i+1] - ai[i];
1400     for (j=0; j<len; j++) {
1401       rows[0] = (*aj++)*bs;
1402       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1403       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1404       array += bs2;
1405     }
1406   }
1407   ierr = PetscFree(rows);CHKERRQ(ierr);
1408 
1409   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1410   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411 
1412   if (B) {
1413     *B = C;
1414   } else {
1415     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1416   }
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1422 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1423 {
1424   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1425   PetscErrorCode ierr;
1426   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1427   int            fd;
1428   PetscScalar    *aa;
1429   FILE           *file;
1430 
1431   PetscFunctionBegin;
1432   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1433   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1434   col_lens[0] = MAT_FILE_COOKIE;
1435 
1436   col_lens[1] = A->rmap.N;
1437   col_lens[2] = A->cmap.n;
1438   col_lens[3] = a->nz*bs2;
1439 
1440   /* store lengths of each row and write (including header) to file */
1441   count = 0;
1442   for (i=0; i<a->mbs; i++) {
1443     for (j=0; j<bs; j++) {
1444       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1445     }
1446   }
1447   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1448   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1449 
1450   /* store column indices (zero start index) */
1451   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1452   count = 0;
1453   for (i=0; i<a->mbs; i++) {
1454     for (j=0; j<bs; j++) {
1455       for (k=a->i[i]; k<a->i[i+1]; k++) {
1456         for (l=0; l<bs; l++) {
1457           jj[count++] = bs*a->j[k] + l;
1458         }
1459       }
1460     }
1461   }
1462   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1463   ierr = PetscFree(jj);CHKERRQ(ierr);
1464 
1465   /* store nonzero values */
1466   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1467   count = 0;
1468   for (i=0; i<a->mbs; i++) {
1469     for (j=0; j<bs; j++) {
1470       for (k=a->i[i]; k<a->i[i+1]; k++) {
1471         for (l=0; l<bs; l++) {
1472           aa[count++] = a->a[bs2*k + l*bs + j];
1473         }
1474       }
1475     }
1476   }
1477   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1478   ierr = PetscFree(aa);CHKERRQ(ierr);
1479 
1480   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1481   if (file) {
1482     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1483   }
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 #undef __FUNCT__
1488 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1489 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1490 {
1491   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1492   PetscErrorCode    ierr;
1493   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1494   PetscViewerFormat format;
1495 
1496   PetscFunctionBegin;
1497   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1498   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1499     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1500   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1501     Mat aij;
1502     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1503     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1504     ierr = MatDestroy(aij);CHKERRQ(ierr);
1505   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1506      PetscFunctionReturn(0);
1507   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1508     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1509     for (i=0; i<a->mbs; i++) {
1510       for (j=0; j<bs; j++) {
1511         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1512         for (k=a->i[i]; k<a->i[i+1]; k++) {
1513           for (l=0; l<bs; l++) {
1514 #if defined(PETSC_USE_COMPLEX)
1515             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1516               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1517                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1518             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1519               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1520                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1521             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1522               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1523             }
1524 #else
1525             if (a->a[bs2*k + l*bs + j] != 0.0) {
1526               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1527             }
1528 #endif
1529           }
1530         }
1531         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1532       }
1533     }
1534     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1535   } else {
1536     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1537     for (i=0; i<a->mbs; i++) {
1538       for (j=0; j<bs; j++) {
1539         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1540         for (k=a->i[i]; k<a->i[i+1]; k++) {
1541           for (l=0; l<bs; l++) {
1542 #if defined(PETSC_USE_COMPLEX)
1543             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1544               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1545                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1546             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1547               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1548                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1549             } else {
1550               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1551             }
1552 #else
1553             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1554 #endif
1555           }
1556         }
1557         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1558       }
1559     }
1560     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1561   }
1562   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1568 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1569 {
1570   Mat            A = (Mat) Aa;
1571   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1572   PetscErrorCode ierr;
1573   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1574   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1575   MatScalar      *aa;
1576   PetscViewer    viewer;
1577 
1578   PetscFunctionBegin;
1579 
1580   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1581   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1582 
1583   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1584 
1585   /* loop over matrix elements drawing boxes */
1586   color = PETSC_DRAW_BLUE;
1587   for (i=0,row=0; i<mbs; i++,row+=bs) {
1588     for (j=a->i[i]; j<a->i[i+1]; j++) {
1589       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1590       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1591       aa = a->a + j*bs2;
1592       for (k=0; k<bs; k++) {
1593         for (l=0; l<bs; l++) {
1594           if (PetscRealPart(*aa++) >=  0.) continue;
1595           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1596         }
1597       }
1598     }
1599   }
1600   color = PETSC_DRAW_CYAN;
1601   for (i=0,row=0; i<mbs; i++,row+=bs) {
1602     for (j=a->i[i]; j<a->i[i+1]; j++) {
1603       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1604       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1605       aa = a->a + j*bs2;
1606       for (k=0; k<bs; k++) {
1607         for (l=0; l<bs; l++) {
1608           if (PetscRealPart(*aa++) != 0.) continue;
1609           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1610         }
1611       }
1612     }
1613   }
1614 
1615   color = PETSC_DRAW_RED;
1616   for (i=0,row=0; i<mbs; i++,row+=bs) {
1617     for (j=a->i[i]; j<a->i[i+1]; j++) {
1618       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1619       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1620       aa = a->a + j*bs2;
1621       for (k=0; k<bs; k++) {
1622         for (l=0; l<bs; l++) {
1623           if (PetscRealPart(*aa++) <= 0.) continue;
1624           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1625         }
1626       }
1627     }
1628   }
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1634 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1635 {
1636   PetscErrorCode ierr;
1637   PetscReal      xl,yl,xr,yr,w,h;
1638   PetscDraw      draw;
1639   PetscTruth     isnull;
1640 
1641   PetscFunctionBegin;
1642 
1643   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1644   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1645 
1646   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1647   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1648   xr += w;    yr += h;  xl = -w;     yl = -h;
1649   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1650   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1651   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 #undef __FUNCT__
1656 #define __FUNCT__ "MatView_SeqBAIJ"
1657 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1658 {
1659   PetscErrorCode ierr;
1660   PetscTruth     iascii,isbinary,isdraw;
1661 
1662   PetscFunctionBegin;
1663   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1664   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1665   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1666   if (iascii){
1667     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1668   } else if (isbinary) {
1669     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1670   } else if (isdraw) {
1671     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1672   } else {
1673     Mat B;
1674     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1675     ierr = MatView(B,viewer);CHKERRQ(ierr);
1676     ierr = MatDestroy(B);CHKERRQ(ierr);
1677   }
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1684 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1685 {
1686   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1687   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1688   PetscInt    *ai = a->i,*ailen = a->ilen;
1689   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1690   MatScalar   *ap,*aa = a->a;
1691 
1692   PetscFunctionBegin;
1693   for (k=0; k<m; k++) { /* loop over rows */
1694     row  = im[k]; brow = row/bs;
1695     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1696     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1697     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1698     nrow = ailen[brow];
1699     for (l=0; l<n; l++) { /* loop over columns */
1700       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1701       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1702       col  = in[l] ;
1703       bcol = col/bs;
1704       cidx = col%bs;
1705       ridx = row%bs;
1706       high = nrow;
1707       low  = 0; /* assume unsorted */
1708       while (high-low > 5) {
1709         t = (low+high)/2;
1710         if (rp[t] > bcol) high = t;
1711         else             low  = t;
1712       }
1713       for (i=low; i<high; i++) {
1714         if (rp[i] > bcol) break;
1715         if (rp[i] == bcol) {
1716           *v++ = ap[bs2*i+bs*cidx+ridx];
1717           goto finished;
1718         }
1719       }
1720       *v++ = 0.0;
1721       finished:;
1722     }
1723   }
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #define CHUNKSIZE 10
1728 #undef __FUNCT__
1729 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1730 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1731 {
1732   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1733   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1734   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1735   PetscErrorCode  ierr;
1736   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1737   PetscTruth      roworiented=a->roworiented;
1738   const MatScalar *value = v;
1739   MatScalar       *ap,*aa = a->a,*bap;
1740 
1741   PetscFunctionBegin;
1742   if (roworiented) {
1743     stepval = (n-1)*bs;
1744   } else {
1745     stepval = (m-1)*bs;
1746   }
1747   for (k=0; k<m; k++) { /* loop over added rows */
1748     row  = im[k];
1749     if (row < 0) continue;
1750 #if defined(PETSC_USE_DEBUG)
1751     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1752 #endif
1753     rp   = aj + ai[row];
1754     ap   = aa + bs2*ai[row];
1755     rmax = imax[row];
1756     nrow = ailen[row];
1757     low  = 0;
1758     high = nrow;
1759     for (l=0; l<n; l++) { /* loop over added columns */
1760       if (in[l] < 0) continue;
1761 #if defined(PETSC_USE_DEBUG)
1762       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1763 #endif
1764       col = in[l];
1765       if (roworiented) {
1766         value = v + k*(stepval+bs)*bs + l*bs;
1767       } else {
1768         value = v + l*(stepval+bs)*bs + k*bs;
1769       }
1770       if (col <= lastcol) low = 0; else high = nrow;
1771       lastcol = col;
1772       while (high-low > 7) {
1773         t = (low+high)/2;
1774         if (rp[t] > col) high = t;
1775         else             low  = t;
1776       }
1777       for (i=low; i<high; i++) {
1778         if (rp[i] > col) break;
1779         if (rp[i] == col) {
1780           bap  = ap +  bs2*i;
1781           if (roworiented) {
1782             if (is == ADD_VALUES) {
1783               for (ii=0; ii<bs; ii++,value+=stepval) {
1784                 for (jj=ii; jj<bs2; jj+=bs) {
1785                   bap[jj] += *value++;
1786                 }
1787               }
1788             } else {
1789               for (ii=0; ii<bs; ii++,value+=stepval) {
1790                 for (jj=ii; jj<bs2; jj+=bs) {
1791                   bap[jj] = *value++;
1792                 }
1793               }
1794             }
1795           } else {
1796             if (is == ADD_VALUES) {
1797               for (ii=0; ii<bs; ii++,value+=stepval) {
1798                 for (jj=0; jj<bs; jj++) {
1799                   *bap++ += *value++;
1800                 }
1801               }
1802             } else {
1803               for (ii=0; ii<bs; ii++,value+=stepval) {
1804                 for (jj=0; jj<bs; jj++) {
1805                   *bap++  = *value++;
1806                 }
1807               }
1808             }
1809           }
1810           goto noinsert2;
1811         }
1812       }
1813       if (nonew == 1) goto noinsert2;
1814       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1815       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1816       N = nrow++ - 1; high++;
1817       /* shift up all the later entries in this row */
1818       for (ii=N; ii>=i; ii--) {
1819         rp[ii+1] = rp[ii];
1820         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1821       }
1822       if (N >= i) {
1823         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1824       }
1825       rp[i] = col;
1826       bap   = ap +  bs2*i;
1827       if (roworiented) {
1828         for (ii=0; ii<bs; ii++,value+=stepval) {
1829           for (jj=ii; jj<bs2; jj+=bs) {
1830             bap[jj] = *value++;
1831           }
1832         }
1833       } else {
1834         for (ii=0; ii<bs; ii++,value+=stepval) {
1835           for (jj=0; jj<bs; jj++) {
1836             *bap++  = *value++;
1837           }
1838         }
1839       }
1840       noinsert2:;
1841       low = i;
1842     }
1843     ailen[row] = nrow;
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1850 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1851 {
1852   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1853   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1854   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1855   PetscErrorCode ierr;
1856   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1857   MatScalar      *aa = a->a,*ap;
1858   PetscReal      ratio=0.6;
1859 
1860   PetscFunctionBegin;
1861   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1862 
1863   if (m) rmax = ailen[0];
1864   for (i=1; i<mbs; i++) {
1865     /* move each row back by the amount of empty slots (fshift) before it*/
1866     fshift += imax[i-1] - ailen[i-1];
1867     rmax   = PetscMax(rmax,ailen[i]);
1868     if (fshift) {
1869       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1870       N = ailen[i];
1871       for (j=0; j<N; j++) {
1872         ip[j-fshift] = ip[j];
1873         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1874       }
1875     }
1876     ai[i] = ai[i-1] + ailen[i-1];
1877   }
1878   if (mbs) {
1879     fshift += imax[mbs-1] - ailen[mbs-1];
1880     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1881   }
1882   /* reset ilen and imax for each row */
1883   for (i=0; i<mbs; i++) {
1884     ailen[i] = imax[i] = ai[i+1] - ai[i];
1885   }
1886   a->nz = ai[mbs];
1887 
1888   /* diagonals may have moved, so kill the diagonal pointers */
1889   a->idiagvalid = PETSC_FALSE;
1890   if (fshift && a->diag) {
1891     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1892     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1893     a->diag = 0;
1894   }
1895   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1896   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1897   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1898   a->reallocs          = 0;
1899   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1900 
1901   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1902   if (a->compressedrow.use){
1903     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1904   }
1905 
1906   A->same_nonzero = PETSC_TRUE;
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 /*
1911    This function returns an array of flags which indicate the locations of contiguous
1912    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1913    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1914    Assume: sizes should be long enough to hold all the values.
1915 */
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1918 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1919 {
1920   PetscInt   i,j,k,row;
1921   PetscTruth flg;
1922 
1923   PetscFunctionBegin;
1924   for (i=0,j=0; i<n; j++) {
1925     row = idx[i];
1926     if (row%bs!=0) { /* Not the begining of a block */
1927       sizes[j] = 1;
1928       i++;
1929     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1930       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1931       i++;
1932     } else { /* Begining of the block, so check if the complete block exists */
1933       flg = PETSC_TRUE;
1934       for (k=1; k<bs; k++) {
1935         if (row+k != idx[i+k]) { /* break in the block */
1936           flg = PETSC_FALSE;
1937           break;
1938         }
1939       }
1940       if (flg) { /* No break in the bs */
1941         sizes[j] = bs;
1942         i+= bs;
1943       } else {
1944         sizes[j] = 1;
1945         i++;
1946       }
1947     }
1948   }
1949   *bs_max = j;
1950   PetscFunctionReturn(0);
1951 }
1952 
1953 #undef __FUNCT__
1954 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1955 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1956 {
1957   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1958   PetscErrorCode ierr;
1959   PetscInt       i,j,k,count,*rows;
1960   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1961   PetscScalar    zero = 0.0;
1962   MatScalar      *aa;
1963 
1964   PetscFunctionBegin;
1965   /* Make a copy of the IS and  sort it */
1966   /* allocate memory for rows,sizes */
1967   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1968   sizes = rows + is_n;
1969 
1970   /* copy IS values to rows, and sort them */
1971   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1972   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1973   if (baij->keepzeroedrows) {
1974     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1975     bs_max = is_n;
1976     A->same_nonzero = PETSC_TRUE;
1977   } else {
1978     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1979     A->same_nonzero = PETSC_FALSE;
1980   }
1981 
1982   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1983     row   = rows[j];
1984     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1985     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1986     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1987     if (sizes[i] == bs && !baij->keepzeroedrows) {
1988       if (diag != 0.0) {
1989         if (baij->ilen[row/bs] > 0) {
1990           baij->ilen[row/bs]       = 1;
1991           baij->j[baij->i[row/bs]] = row/bs;
1992           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1993         }
1994         /* Now insert all the diagonal values for this bs */
1995         for (k=0; k<bs; k++) {
1996           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1997         }
1998       } else { /* (diag == 0.0) */
1999         baij->ilen[row/bs] = 0;
2000       } /* end (diag == 0.0) */
2001     } else { /* (sizes[i] != bs) */
2002 #if defined (PETSC_USE_DEBUG)
2003       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2004 #endif
2005       for (k=0; k<count; k++) {
2006         aa[0] =  zero;
2007         aa    += bs;
2008       }
2009       if (diag != 0.0) {
2010         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2011       }
2012     }
2013   }
2014 
2015   ierr = PetscFree(rows);CHKERRQ(ierr);
2016   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2022 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2023 {
2024   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2025   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2026   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2027   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
2028   PetscErrorCode ierr;
2029   PetscInt       ridx,cidx,bs2=a->bs2;
2030   PetscTruth     roworiented=a->roworiented;
2031   MatScalar      *ap,value,*aa=a->a,*bap;
2032 
2033   PetscFunctionBegin;
2034   for (k=0; k<m; k++) { /* loop over added rows */
2035     row  = im[k];
2036     brow = row/bs;
2037     if (row < 0) continue;
2038 #if defined(PETSC_USE_DEBUG)
2039     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
2040 #endif
2041     rp   = aj + ai[brow];
2042     ap   = aa + bs2*ai[brow];
2043     rmax = imax[brow];
2044     nrow = ailen[brow];
2045     low  = 0;
2046     high = nrow;
2047     for (l=0; l<n; l++) { /* loop over added columns */
2048       if (in[l] < 0) continue;
2049 #if defined(PETSC_USE_DEBUG)
2050       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
2051 #endif
2052       col = in[l]; bcol = col/bs;
2053       ridx = row % bs; cidx = col % bs;
2054       if (roworiented) {
2055         value = v[l + k*n];
2056       } else {
2057         value = v[k + l*m];
2058       }
2059       if (col <= lastcol) low = 0; else high = nrow;
2060       lastcol = col;
2061       while (high-low > 7) {
2062         t = (low+high)/2;
2063         if (rp[t] > bcol) high = t;
2064         else              low  = t;
2065       }
2066       for (i=low; i<high; i++) {
2067         if (rp[i] > bcol) break;
2068         if (rp[i] == bcol) {
2069           bap  = ap +  bs2*i + bs*cidx + ridx;
2070           if (is == ADD_VALUES) *bap += value;
2071           else                  *bap  = value;
2072           goto noinsert1;
2073         }
2074       }
2075       if (nonew == 1) goto noinsert1;
2076       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2077       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2078       N = nrow++ - 1; high++;
2079       /* shift up all the later entries in this row */
2080       for (ii=N; ii>=i; ii--) {
2081         rp[ii+1] = rp[ii];
2082         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2083       }
2084       if (N>=i) {
2085         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2086       }
2087       rp[i]                      = bcol;
2088       ap[bs2*i + bs*cidx + ridx] = value;
2089       a->nz++;
2090       noinsert1:;
2091       low = i;
2092     }
2093     ailen[brow] = nrow;
2094   }
2095   A->same_nonzero = PETSC_FALSE;
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 
2100 #undef __FUNCT__
2101 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2102 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
2103 {
2104   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2105   Mat            outA;
2106   PetscErrorCode ierr;
2107   PetscTruth     row_identity,col_identity;
2108 
2109   PetscFunctionBegin;
2110   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2111   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2112   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2113   if (!row_identity || !col_identity) {
2114     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2115   }
2116 
2117   outA          = inA;
2118   inA->factor   = FACTOR_LU;
2119 
2120   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2121 
2122   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2123   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2124   a->row = row;
2125   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2126   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2127   a->col = col;
2128 
2129   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2130   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2131   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2132 
2133   /*
2134       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
2135       for ILU(0) factorization with natural ordering
2136   */
2137   if (inA->rmap.bs < 8) {
2138     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
2139   } else {
2140     if (!a->solve_work) {
2141       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2142       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2143     }
2144   }
2145 
2146   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
2147 
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 EXTERN_C_BEGIN
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2154 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2155 {
2156   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2157   PetscInt    i,nz,nbs;
2158 
2159   PetscFunctionBegin;
2160   nz  = baij->maxnz/baij->bs2;
2161   nbs = baij->nbs;
2162   for (i=0; i<nz; i++) {
2163     baij->j[i] = indices[i];
2164   }
2165   baij->nz = nz;
2166   for (i=0; i<nbs; i++) {
2167     baij->ilen[i] = baij->imax[i];
2168   }
2169 
2170   PetscFunctionReturn(0);
2171 }
2172 EXTERN_C_END
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2176 /*@
2177     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2178        in the matrix.
2179 
2180   Input Parameters:
2181 +  mat - the SeqBAIJ matrix
2182 -  indices - the column indices
2183 
2184   Level: advanced
2185 
2186   Notes:
2187     This can be called if you have precomputed the nonzero structure of the
2188   matrix and want to provide it to the matrix object to improve the performance
2189   of the MatSetValues() operation.
2190 
2191     You MUST have set the correct numbers of nonzeros per row in the call to
2192   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2193 
2194     MUST be called before any calls to MatSetValues();
2195 
2196 @*/
2197 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2198 {
2199   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2200 
2201   PetscFunctionBegin;
2202   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2203   PetscValidPointer(indices,2);
2204   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2205   if (f) {
2206     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2207   } else {
2208     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2209   }
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 #undef __FUNCT__
2214 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2215 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2216 {
2217   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2218   PetscErrorCode ierr;
2219   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2220   PetscReal      atmp;
2221   PetscScalar    *x,zero = 0.0;
2222   MatScalar      *aa;
2223   PetscInt       ncols,brow,krow,kcol;
2224 
2225   PetscFunctionBegin;
2226   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2227   bs   = A->rmap.bs;
2228   aa   = a->a;
2229   ai   = a->i;
2230   aj   = a->j;
2231   mbs  = a->mbs;
2232 
2233   ierr = VecSet(v,zero);CHKERRQ(ierr);
2234   if (idx) {
2235     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
2236   }
2237   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2238   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2239   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2240   for (i=0; i<mbs; i++) {
2241     ncols = ai[1] - ai[0]; ai++;
2242     brow  = bs*i;
2243     for (j=0; j<ncols; j++){
2244       for (kcol=0; kcol<bs; kcol++){
2245         for (krow=0; krow<bs; krow++){
2246           atmp = PetscAbsScalar(*aa);aa++;
2247           row = brow + krow;    /* row index */
2248           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2249           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2250         }
2251       }
2252       aj++;
2253     }
2254   }
2255   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatCopy_SeqBAIJ"
2261 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2262 {
2263   PetscErrorCode ierr;
2264 
2265   PetscFunctionBegin;
2266   /* If the two matrices have the same copy implementation, use fast copy. */
2267   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2268     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2269     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2270 
2271     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
2272       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2273     }
2274     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
2275   } else {
2276     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2277   }
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2283 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2284 {
2285   PetscErrorCode ierr;
2286 
2287   PetscFunctionBegin;
2288   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 #undef __FUNCT__
2293 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2294 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2295 {
2296   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2297   PetscFunctionBegin;
2298   *array = a->a;
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 #undef __FUNCT__
2303 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2304 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2305 {
2306   PetscFunctionBegin;
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 #include "petscblaslapack.h"
2311 #undef __FUNCT__
2312 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2313 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2314 {
2315   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2316   PetscErrorCode ierr;
2317   PetscInt       i,bs=Y->rmap.bs,j,bs2;
2318   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2319 
2320   PetscFunctionBegin;
2321   if (str == SAME_NONZERO_PATTERN) {
2322     PetscScalar alpha = a;
2323     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2324   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2325     if (y->xtoy && y->XtoY != X) {
2326       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2327       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2328     }
2329     if (!y->xtoy) { /* get xtoy */
2330       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2331       y->XtoY = X;
2332     }
2333     bs2 = bs*bs;
2334     for (i=0; i<x->nz; i++) {
2335       j = 0;
2336       while (j < bs2){
2337         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2338         j++;
2339       }
2340     }
2341     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2342   } else {
2343     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2344   }
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2350 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2351 {
2352   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2353   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2354   PetscScalar    *aa = a->a;
2355 
2356   PetscFunctionBegin;
2357   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 #undef __FUNCT__
2362 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2363 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2364 {
2365   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2366   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2367   PetscScalar    *aa = a->a;
2368 
2369   PetscFunctionBegin;
2370   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 
2375 /* -------------------------------------------------------------------*/
2376 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2377        MatGetRow_SeqBAIJ,
2378        MatRestoreRow_SeqBAIJ,
2379        MatMult_SeqBAIJ_N,
2380 /* 4*/ MatMultAdd_SeqBAIJ_N,
2381        MatMultTranspose_SeqBAIJ,
2382        MatMultTransposeAdd_SeqBAIJ,
2383        MatSolve_SeqBAIJ_N,
2384        0,
2385        0,
2386 /*10*/ 0,
2387        MatLUFactor_SeqBAIJ,
2388        0,
2389        0,
2390        MatTranspose_SeqBAIJ,
2391 /*15*/ MatGetInfo_SeqBAIJ,
2392        MatEqual_SeqBAIJ,
2393        MatGetDiagonal_SeqBAIJ,
2394        MatDiagonalScale_SeqBAIJ,
2395        MatNorm_SeqBAIJ,
2396 /*20*/ 0,
2397        MatAssemblyEnd_SeqBAIJ,
2398        0,
2399        MatSetOption_SeqBAIJ,
2400        MatZeroEntries_SeqBAIJ,
2401 /*25*/ MatZeroRows_SeqBAIJ,
2402        MatLUFactorSymbolic_SeqBAIJ,
2403        MatLUFactorNumeric_SeqBAIJ_N,
2404        MatCholeskyFactorSymbolic_SeqBAIJ,
2405        MatCholeskyFactorNumeric_SeqBAIJ_N,
2406 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2407        MatILUFactorSymbolic_SeqBAIJ,
2408        MatICCFactorSymbolic_SeqBAIJ,
2409        MatGetArray_SeqBAIJ,
2410        MatRestoreArray_SeqBAIJ,
2411 /*35*/ MatDuplicate_SeqBAIJ,
2412        0,
2413        0,
2414        MatILUFactor_SeqBAIJ,
2415        0,
2416 /*40*/ MatAXPY_SeqBAIJ,
2417        MatGetSubMatrices_SeqBAIJ,
2418        MatIncreaseOverlap_SeqBAIJ,
2419        MatGetValues_SeqBAIJ,
2420        MatCopy_SeqBAIJ,
2421 /*45*/ 0,
2422        MatScale_SeqBAIJ,
2423        0,
2424        0,
2425        0,
2426 /*50*/ 0,
2427        MatGetRowIJ_SeqBAIJ,
2428        MatRestoreRowIJ_SeqBAIJ,
2429        0,
2430        0,
2431 /*55*/ 0,
2432        0,
2433        0,
2434        0,
2435        MatSetValuesBlocked_SeqBAIJ,
2436 /*60*/ MatGetSubMatrix_SeqBAIJ,
2437        MatDestroy_SeqBAIJ,
2438        MatView_SeqBAIJ,
2439        0,
2440        0,
2441 /*65*/ 0,
2442        0,
2443        0,
2444        0,
2445        0,
2446 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2447        MatConvert_Basic,
2448        0,
2449        0,
2450        0,
2451 /*75*/ 0,
2452        0,
2453        0,
2454        0,
2455        0,
2456 /*80*/ 0,
2457        0,
2458        0,
2459        0,
2460        MatLoad_SeqBAIJ,
2461 /*85*/ 0,
2462        0,
2463        0,
2464        0,
2465        0,
2466 /*90*/ 0,
2467        0,
2468        0,
2469        0,
2470        0,
2471 /*95*/ 0,
2472        0,
2473        0,
2474        0,
2475        0,
2476 /*100*/0,
2477        0,
2478        0,
2479        0,
2480        0,
2481 /*105*/0,
2482        MatRealPart_SeqBAIJ,
2483        MatImaginaryPart_SeqBAIJ,
2484        0,
2485        0,
2486 /*110*/0,
2487        0,
2488        0,
2489        0,
2490        MatMissingDiagonal_SeqBAIJ
2491 /*115*/
2492 };
2493 
2494 EXTERN_C_BEGIN
2495 #undef __FUNCT__
2496 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2497 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2498 {
2499   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2500   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2501   PetscErrorCode ierr;
2502 
2503   PetscFunctionBegin;
2504   if (aij->nonew != 1) {
2505     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2506   }
2507 
2508   /* allocate space for values if not already there */
2509   if (!aij->saved_values) {
2510     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2511   }
2512 
2513   /* copy values over */
2514   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 EXTERN_C_END
2518 
2519 EXTERN_C_BEGIN
2520 #undef __FUNCT__
2521 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2522 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2523 {
2524   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2525   PetscErrorCode ierr;
2526   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2527 
2528   PetscFunctionBegin;
2529   if (aij->nonew != 1) {
2530     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2531   }
2532   if (!aij->saved_values) {
2533     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2534   }
2535 
2536   /* copy values over */
2537   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2538   PetscFunctionReturn(0);
2539 }
2540 EXTERN_C_END
2541 
2542 EXTERN_C_BEGIN
2543 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2544 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2545 EXTERN_C_END
2546 
2547 EXTERN_C_BEGIN
2548 #undef __FUNCT__
2549 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2550 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2551 {
2552   Mat_SeqBAIJ    *b;
2553   PetscErrorCode ierr;
2554   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2555   PetscTruth     flg,skipallocation = PETSC_FALSE;
2556 
2557   PetscFunctionBegin;
2558 
2559   if (nz == MAT_SKIP_ALLOCATION) {
2560     skipallocation = PETSC_TRUE;
2561     nz             = 0;
2562   }
2563 
2564   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2565     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2566   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2567 
2568   if (nnz && newbs != bs) {
2569     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2570   }
2571   bs   = newbs;
2572 
2573   B->rmap.bs = B->cmap.bs = bs;
2574   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2575   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2576 
2577   B->preallocated = PETSC_TRUE;
2578 
2579   mbs  = B->rmap.n/bs;
2580   nbs  = B->cmap.n/bs;
2581   bs2  = bs*bs;
2582 
2583   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2584     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2585   }
2586 
2587   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2588   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2589   if (nnz) {
2590     for (i=0; i<mbs; i++) {
2591       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2592       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2593     }
2594   }
2595 
2596   b       = (Mat_SeqBAIJ*)B->data;
2597   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2598     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2599   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2600 
2601   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2602   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2603   if (!flg) {
2604     switch (bs) {
2605     case 1:
2606       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2607       B->ops->mult            = MatMult_SeqBAIJ_1;
2608       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2609       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2610       break;
2611     case 2:
2612       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2613       B->ops->mult            = MatMult_SeqBAIJ_2;
2614       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2615       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2616       break;
2617     case 3:
2618       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2619       B->ops->mult            = MatMult_SeqBAIJ_3;
2620       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2621       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2622       break;
2623     case 4:
2624       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2625       B->ops->mult            = MatMult_SeqBAIJ_4;
2626       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2627       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2628       break;
2629     case 5:
2630       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2631       B->ops->mult            = MatMult_SeqBAIJ_5;
2632       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2633       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2634       break;
2635     case 6:
2636       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2637       B->ops->mult            = MatMult_SeqBAIJ_6;
2638       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2639       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2640       break;
2641     case 7:
2642       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2643       B->ops->mult            = MatMult_SeqBAIJ_7;
2644       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2645       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2646       break;
2647     default:
2648       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2649       B->ops->mult            = MatMult_SeqBAIJ_N;
2650       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2651       break;
2652     }
2653   }
2654   B->rmap.bs      = bs;
2655   b->mbs     = mbs;
2656   b->nbs     = nbs;
2657   if (!skipallocation) {
2658     if (!b->imax) {
2659       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2660     }
2661     /* b->ilen will count nonzeros in each block row so far. */
2662     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2663     if (!nnz) {
2664       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2665       else if (nz <= 0)        nz = 1;
2666       for (i=0; i<mbs; i++) b->imax[i] = nz;
2667       nz = nz*mbs;
2668     } else {
2669       nz = 0;
2670       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2671     }
2672 
2673     /* allocate the matrix space */
2674     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2675     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2676     ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2677     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2678     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2679     b->singlemalloc = PETSC_TRUE;
2680     b->i[0] = 0;
2681     for (i=1; i<mbs+1; i++) {
2682       b->i[i] = b->i[i-1] + b->imax[i-1];
2683     }
2684     b->free_a     = PETSC_TRUE;
2685     b->free_ij    = PETSC_TRUE;
2686   } else {
2687     b->free_a     = PETSC_FALSE;
2688     b->free_ij    = PETSC_FALSE;
2689   }
2690 
2691   B->rmap.bs          = bs;
2692   b->bs2              = bs2;
2693   b->mbs              = mbs;
2694   b->nz               = 0;
2695   b->maxnz            = nz*bs2;
2696   B->info.nz_unneeded = (PetscReal)b->maxnz;
2697   PetscFunctionReturn(0);
2698 }
2699 EXTERN_C_END
2700 
2701 /*MC
2702    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2703    block sparse compressed row format.
2704 
2705    Options Database Keys:
2706 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2707 
2708   Level: beginner
2709 
2710 .seealso: MatCreateSeqBAIJ()
2711 M*/
2712 
2713 EXTERN_C_BEGIN
2714 #undef __FUNCT__
2715 #define __FUNCT__ "MatCreate_SeqBAIJ"
2716 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2717 {
2718   PetscErrorCode ierr;
2719   PetscMPIInt    size;
2720   Mat_SeqBAIJ    *b;
2721 
2722   PetscFunctionBegin;
2723   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2724   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2725 
2726   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2727   B->data = (void*)b;
2728   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2729   B->factor           = 0;
2730   B->mapping          = 0;
2731   b->row              = 0;
2732   b->col              = 0;
2733   b->icol             = 0;
2734   b->reallocs         = 0;
2735   b->saved_values     = 0;
2736 
2737   b->roworiented      = PETSC_TRUE;
2738   b->nonew            = 0;
2739   b->diag             = 0;
2740   b->solve_work       = 0;
2741   b->mult_work        = 0;
2742   B->spptr            = 0;
2743   B->info.nz_unneeded = (PetscReal)b->maxnz;
2744   b->keepzeroedrows   = PETSC_FALSE;
2745   b->xtoy              = 0;
2746   b->XtoY              = 0;
2747   b->compressedrow.use     = PETSC_FALSE;
2748   b->compressedrow.nrows   = 0;
2749   b->compressedrow.i       = PETSC_NULL;
2750   b->compressedrow.rindex  = PETSC_NULL;
2751   b->compressedrow.checked = PETSC_FALSE;
2752   B->same_nonzero          = PETSC_FALSE;
2753 
2754   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2755                                      "MatInvertBlockDiagonal_SeqBAIJ",
2756                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2757   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2758                                      "MatStoreValues_SeqBAIJ",
2759                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2760   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2761                                      "MatRetrieveValues_SeqBAIJ",
2762                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2763   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2764                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2765                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2766   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2767                                      "MatConvert_SeqBAIJ_SeqAIJ",
2768                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2769   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2770                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2771                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2772   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2773                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2774                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2775   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2776   PetscFunctionReturn(0);
2777 }
2778 EXTERN_C_END
2779 
2780 #undef __FUNCT__
2781 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2782 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2783 {
2784   Mat            C;
2785   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2786   PetscErrorCode ierr;
2787   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2788 
2789   PetscFunctionBegin;
2790   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2791 
2792   *B = 0;
2793   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
2794   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2795   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2796   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2797   c    = (Mat_SeqBAIJ*)C->data;
2798 
2799   C->rmap.N   = A->rmap.N;
2800   C->cmap.N   = A->cmap.N;
2801   C->rmap.bs  = A->rmap.bs;
2802   c->bs2 = a->bs2;
2803   c->mbs = a->mbs;
2804   c->nbs = a->nbs;
2805 
2806   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2807   for (i=0; i<mbs; i++) {
2808     c->imax[i] = a->imax[i];
2809     c->ilen[i] = a->ilen[i];
2810   }
2811 
2812   /* allocate the matrix space */
2813   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2814   c->singlemalloc = PETSC_TRUE;
2815   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2816   if (mbs > 0) {
2817     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2818     if (cpvalues == MAT_COPY_VALUES) {
2819       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2820     } else {
2821       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2822     }
2823   }
2824   c->roworiented = a->roworiented;
2825   c->nonew       = a->nonew;
2826 
2827   if (a->diag) {
2828     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2829     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2830     for (i=0; i<mbs; i++) {
2831       c->diag[i] = a->diag[i];
2832     }
2833   } else c->diag        = 0;
2834   c->nz                 = a->nz;
2835   c->maxnz              = a->maxnz;
2836   c->solve_work         = 0;
2837   c->mult_work          = 0;
2838   c->free_a             = PETSC_TRUE;
2839   c->free_ij            = PETSC_TRUE;
2840   C->preallocated       = PETSC_TRUE;
2841   C->assembled          = PETSC_TRUE;
2842 
2843   c->compressedrow.use     = a->compressedrow.use;
2844   c->compressedrow.nrows   = a->compressedrow.nrows;
2845   c->compressedrow.checked = a->compressedrow.checked;
2846   if ( a->compressedrow.checked && a->compressedrow.use){
2847     i = a->compressedrow.nrows;
2848     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2849     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2850     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2851     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2852   } else {
2853     c->compressedrow.use    = PETSC_FALSE;
2854     c->compressedrow.i      = PETSC_NULL;
2855     c->compressedrow.rindex = PETSC_NULL;
2856   }
2857   C->same_nonzero = A->same_nonzero;
2858   *B = C;
2859   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 #undef __FUNCT__
2864 #define __FUNCT__ "MatLoad_SeqBAIJ"
2865 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2866 {
2867   Mat_SeqBAIJ    *a;
2868   Mat            B;
2869   PetscErrorCode ierr;
2870   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2871   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2872   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2873   PetscInt       *masked,nmask,tmp,bs2,ishift;
2874   PetscMPIInt    size;
2875   int            fd;
2876   PetscScalar    *aa;
2877   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2878 
2879   PetscFunctionBegin;
2880   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2881     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2882   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2883   bs2  = bs*bs;
2884 
2885   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2886   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2887   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2888   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2889   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2890   M = header[1]; N = header[2]; nz = header[3];
2891 
2892   if (header[3] < 0) {
2893     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2894   }
2895 
2896   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2897 
2898   /*
2899      This code adds extra rows to make sure the number of rows is
2900     divisible by the blocksize
2901   */
2902   mbs        = M/bs;
2903   extra_rows = bs - M + bs*(mbs);
2904   if (extra_rows == bs) extra_rows = 0;
2905   else                  mbs++;
2906   if (extra_rows) {
2907     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2908   }
2909 
2910   /* read in row lengths */
2911   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2912   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2913   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2914 
2915   /* read in column indices */
2916   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2917   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2918   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2919 
2920   /* loop over row lengths determining block row lengths */
2921   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2922   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2923   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2924   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2925   masked   = mask + mbs;
2926   rowcount = 0; nzcount = 0;
2927   for (i=0; i<mbs; i++) {
2928     nmask = 0;
2929     for (j=0; j<bs; j++) {
2930       kmax = rowlengths[rowcount];
2931       for (k=0; k<kmax; k++) {
2932         tmp = jj[nzcount++]/bs;
2933         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2934       }
2935       rowcount++;
2936     }
2937     browlengths[i] += nmask;
2938     /* zero out the mask elements we set */
2939     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2940   }
2941 
2942   /* create our matrix */
2943   ierr = MatCreate(comm,&B);
2944   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2945   ierr = MatSetType(B,type);CHKERRQ(ierr);
2946   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2947   a = (Mat_SeqBAIJ*)B->data;
2948 
2949   /* set matrix "i" values */
2950   a->i[0] = 0;
2951   for (i=1; i<= mbs; i++) {
2952     a->i[i]      = a->i[i-1] + browlengths[i-1];
2953     a->ilen[i-1] = browlengths[i-1];
2954   }
2955   a->nz         = 0;
2956   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2957 
2958   /* read in nonzero values */
2959   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2960   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2961   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2962 
2963   /* set "a" and "j" values into matrix */
2964   nzcount = 0; jcount = 0;
2965   for (i=0; i<mbs; i++) {
2966     nzcountb = nzcount;
2967     nmask    = 0;
2968     for (j=0; j<bs; j++) {
2969       kmax = rowlengths[i*bs+j];
2970       for (k=0; k<kmax; k++) {
2971         tmp = jj[nzcount++]/bs;
2972 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2973       }
2974     }
2975     /* sort the masked values */
2976     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2977 
2978     /* set "j" values into matrix */
2979     maskcount = 1;
2980     for (j=0; j<nmask; j++) {
2981       a->j[jcount++]  = masked[j];
2982       mask[masked[j]] = maskcount++;
2983     }
2984     /* set "a" values into matrix */
2985     ishift = bs2*a->i[i];
2986     for (j=0; j<bs; j++) {
2987       kmax = rowlengths[i*bs+j];
2988       for (k=0; k<kmax; k++) {
2989         tmp       = jj[nzcountb]/bs ;
2990         block     = mask[tmp] - 1;
2991         point     = jj[nzcountb] - bs*tmp;
2992         idx       = ishift + bs2*block + j + bs*point;
2993         a->a[idx] = (MatScalar)aa[nzcountb++];
2994       }
2995     }
2996     /* zero out the mask elements we set */
2997     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2998   }
2999   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3000 
3001   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3002   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3003   ierr = PetscFree(aa);CHKERRQ(ierr);
3004   ierr = PetscFree(jj);CHKERRQ(ierr);
3005   ierr = PetscFree(mask);CHKERRQ(ierr);
3006 
3007   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3008   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3009   ierr = MatView_Private(B);CHKERRQ(ierr);
3010 
3011   *A = B;
3012   PetscFunctionReturn(0);
3013 }
3014 
3015 #undef __FUNCT__
3016 #define __FUNCT__ "MatCreateSeqBAIJ"
3017 /*@C
3018    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3019    compressed row) format.  For good matrix assembly performance the
3020    user should preallocate the matrix storage by setting the parameter nz
3021    (or the array nnz).  By setting these parameters accurately, performance
3022    during matrix assembly can be increased by more than a factor of 50.
3023 
3024    Collective on MPI_Comm
3025 
3026    Input Parameters:
3027 +  comm - MPI communicator, set to PETSC_COMM_SELF
3028 .  bs - size of block
3029 .  m - number of rows
3030 .  n - number of columns
3031 .  nz - number of nonzero blocks  per block row (same for all rows)
3032 -  nnz - array containing the number of nonzero blocks in the various block rows
3033          (possibly different for each block row) or PETSC_NULL
3034 
3035    Output Parameter:
3036 .  A - the matrix
3037 
3038    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3039    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3040    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3041    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3042 
3043    Options Database Keys:
3044 .   -mat_no_unroll - uses code that does not unroll the loops in the
3045                      block calculations (much slower)
3046 .    -mat_block_size - size of the blocks to use
3047 
3048    Level: intermediate
3049 
3050    Notes:
3051    The number of rows and columns must be divisible by blocksize.
3052 
3053    If the nnz parameter is given then the nz parameter is ignored
3054 
3055    A nonzero block is any block that as 1 or more nonzeros in it
3056 
3057    The block AIJ format is fully compatible with standard Fortran 77
3058    storage.  That is, the stored row and column indices can begin at
3059    either one (as in Fortran) or zero.  See the users' manual for details.
3060 
3061    Specify the preallocated storage with either nz or nnz (not both).
3062    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3063    allocation.  For additional details, see the users manual chapter on
3064    matrices.
3065 
3066 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3067 @*/
3068 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3069 {
3070   PetscErrorCode ierr;
3071 
3072   PetscFunctionBegin;
3073   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3074   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3075   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3076   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3077   PetscFunctionReturn(0);
3078 }
3079 
3080 #undef __FUNCT__
3081 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3082 /*@C
3083    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3084    per row in the matrix. For good matrix assembly performance the
3085    user should preallocate the matrix storage by setting the parameter nz
3086    (or the array nnz).  By setting these parameters accurately, performance
3087    during matrix assembly can be increased by more than a factor of 50.
3088 
3089    Collective on MPI_Comm
3090 
3091    Input Parameters:
3092 +  A - the matrix
3093 .  bs - size of block
3094 .  nz - number of block nonzeros per block row (same for all rows)
3095 -  nnz - array containing the number of block nonzeros in the various block rows
3096          (possibly different for each block row) or PETSC_NULL
3097 
3098    Options Database Keys:
3099 .   -mat_no_unroll - uses code that does not unroll the loops in the
3100                      block calculations (much slower)
3101 .    -mat_block_size - size of the blocks to use
3102 
3103    Level: intermediate
3104 
3105    Notes:
3106    If the nnz parameter is given then the nz parameter is ignored
3107 
3108    You can call MatGetInfo() to get information on how effective the preallocation was;
3109    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3110    You can also run with the option -info and look for messages with the string
3111    malloc in them to see if additional memory allocation was needed.
3112 
3113    The block AIJ format is fully compatible with standard Fortran 77
3114    storage.  That is, the stored row and column indices can begin at
3115    either one (as in Fortran) or zero.  See the users' manual for details.
3116 
3117    Specify the preallocated storage with either nz or nnz (not both).
3118    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3119    allocation.  For additional details, see the users manual chapter on
3120    matrices.
3121 
3122 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3123 @*/
3124 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3125 {
3126   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3127 
3128   PetscFunctionBegin;
3129   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3130   if (f) {
3131     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3132   }
3133   PetscFunctionReturn(0);
3134 }
3135 
3136 #undef __FUNCT__
3137 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3138 /*@
3139      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3140               (upper triangular entries in CSR format) provided by the user.
3141 
3142      Collective on MPI_Comm
3143 
3144    Input Parameters:
3145 +  comm - must be an MPI communicator of size 1
3146 .  bs - size of block
3147 .  m - number of rows
3148 .  n - number of columns
3149 .  i - row indices
3150 .  j - column indices
3151 -  a - matrix values
3152 
3153    Output Parameter:
3154 .  mat - the matrix
3155 
3156    Level: intermediate
3157 
3158    Notes:
3159        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3160     once the matrix is destroyed
3161 
3162        You cannot set new nonzero locations into this matrix, that will generate an error.
3163 
3164        The i and j indices are 0 based
3165 
3166 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3167 
3168 @*/
3169 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3170 {
3171   PetscErrorCode ierr;
3172   PetscInt       ii;
3173   Mat_SeqBAIJ    *baij;
3174 
3175   PetscFunctionBegin;
3176   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3177   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3178 
3179   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3180   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3181   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3182   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3183   baij = (Mat_SeqBAIJ*)(*mat)->data;
3184   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3185 
3186   baij->i = i;
3187   baij->j = j;
3188   baij->a = a;
3189   baij->singlemalloc = PETSC_FALSE;
3190   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3191   baij->free_a       = PETSC_FALSE;
3192   baij->free_ij       = PETSC_FALSE;
3193 
3194   for (ii=0; ii<m; ii++) {
3195     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3196 #if defined(PETSC_USE_DEBUG)
3197     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3198 #endif
3199   }
3200 #if defined(PETSC_USE_DEBUG)
3201   for (ii=0; ii<baij->i[m]; ii++) {
3202     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3203     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3204   }
3205 #endif
3206 
3207   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3208   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3209   PetscFunctionReturn(0);
3210 }
3211