xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a77337e4d7d5143cd577a9cca2c72dcc9694336d)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
118 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,s1;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0];
146       i2     = 1;
147       idiag += 1;
148       for (i=1; i<m; i++) {
149         v     = aa + ai[i];
150         vi    = aj + ai[i];
151         nz    = diag[i] - ai[i];
152         s1    = b[i2];
153         while (nz--) {
154           idx  = (*vi++);
155           x1   = x[idx];
156           s1  -= v[0]*x1;
157           v   += 1;
158         }
159         x[i2]   = idiag[0]*s1;
160         idiag   += 1;
161         i2      += 1;
162       }
163       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
164       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
165     }
166     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
167         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
168       i2    = 0;
169       mdiag = a->idiag+a->mbs;
170       for (i=0; i<m; i++) {
171         x1      = x[i2];
172         x[i2]   = mdiag[0]*x1;
173         mdiag  += 1;
174         i2     += 1;
175       }
176       ierr = PetscLogFlops(m);CHKERRQ(ierr);
177     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
178       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
179     }
180     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
181       idiag   = a->idiag+a->mbs - 1;
182       i2      = m - 1;
183       x1      = x[i2];
184       x[i2]   = idiag[0]*x1;
185       idiag -= 1;
186       i2    -= 1;
187       for (i=m-2; i>=0; i--) {
188         v     = aa + (diag[i]+1);
189         vi    = aj + diag[i] + 1;
190         nz    = ai[i+1] - diag[i] - 1;
191         s1    = x[i2];
192         while (nz--) {
193           idx  = (*vi++);
194           x1   = x[idx];
195           s1  -= v[0]*x1;
196           v   += 1;
197         }
198         x[i2]   = idiag[0]*s1;
199         idiag   -= 2;
200         i2      -= 1;
201       }
202       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
203     }
204   } else {
205     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
206   }
207   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
208   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
214 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
215 {
216   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
217   PetscScalar        *x,x1,x2,s1,s2;
218   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
219   PetscErrorCode     ierr;
220   PetscInt           m = a->mbs,i,i2,nz,idx;
221   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
222 
223   PetscFunctionBegin;
224   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
225   its = its*lits;
226   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
227   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
228   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
229   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
230   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
231 
232   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
233 
234   diag  = a->diag;
235   idiag = a->idiag;
236   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
237   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
238 
239   if (flag & SOR_ZERO_INITIAL_GUESS) {
240     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
241       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
242       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
243       i2     = 2;
244       idiag += 4;
245       for (i=1; i<m; i++) {
246 	v     = aa + 4*ai[i];
247 	vi    = aj + ai[i];
248 	nz    = diag[i] - ai[i];
249 	s1    = b[i2]; s2 = b[i2+1];
250 	while (nz--) {
251 	  idx  = 2*(*vi++);
252 	  x1   = x[idx]; x2 = x[1+idx];
253 	  s1  -= v[0]*x1 + v[2]*x2;
254 	  s2  -= v[1]*x1 + v[3]*x2;
255 	  v   += 4;
256 	}
257 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
258 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
259         idiag   += 4;
260         i2      += 2;
261       }
262       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
263       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
264     }
265     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
266         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
267       i2    = 0;
268       mdiag = a->idiag+4*a->mbs;
269       for (i=0; i<m; i++) {
270         x1      = x[i2]; x2 = x[i2+1];
271         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
272         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
273         mdiag  += 4;
274         i2     += 2;
275       }
276       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
277     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
278       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
279     }
280     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
281       idiag   = a->idiag+4*a->mbs - 4;
282       i2      = 2*m - 2;
283       x1      = x[i2]; x2 = x[i2+1];
284       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
285       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
286       idiag -= 4;
287       i2    -= 2;
288       for (i=m-2; i>=0; i--) {
289 	v     = aa + 4*(diag[i]+1);
290 	vi    = aj + diag[i] + 1;
291 	nz    = ai[i+1] - diag[i] - 1;
292 	s1    = x[i2]; s2 = x[i2+1];
293 	while (nz--) {
294 	  idx  = 2*(*vi++);
295 	  x1   = x[idx]; x2 = x[1+idx];
296 	  s1  -= v[0]*x1 + v[2]*x2;
297 	  s2  -= v[1]*x1 + v[3]*x2;
298 	  v   += 4;
299 	}
300 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
301 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
302         idiag   -= 4;
303         i2      -= 2;
304       }
305       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
306     }
307   } else {
308     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
309   }
310   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
311   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
317 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
318 {
319   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
320   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
321   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
322   PetscErrorCode     ierr;
323   PetscInt           m = a->mbs,i,i2,nz,idx;
324   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
325 
326   PetscFunctionBegin;
327   its = its*lits;
328   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
329   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
330   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
331   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
332   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
333   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
334 
335   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
336 
337   diag  = a->diag;
338   idiag = a->idiag;
339   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
340   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
341 
342   if (flag & SOR_ZERO_INITIAL_GUESS) {
343     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
344       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
345       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
346       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
347       i2     = 3;
348       idiag += 9;
349       for (i=1; i<m; i++) {
350         v     = aa + 9*ai[i];
351         vi    = aj + ai[i];
352         nz    = diag[i] - ai[i];
353         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
354         while (nz--) {
355           idx  = 3*(*vi++);
356           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
357           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
358           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
359           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
360           v   += 9;
361         }
362         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
363         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
364         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
365         idiag   += 9;
366         i2      += 3;
367       }
368       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
369       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
370     }
371     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
372         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
373       i2    = 0;
374       mdiag = a->idiag+9*a->mbs;
375       for (i=0; i<m; i++) {
376         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
377         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
378         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
379         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
380         mdiag  += 9;
381         i2     += 3;
382       }
383       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
384     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
385       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
386     }
387     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
388       idiag   = a->idiag+9*a->mbs - 9;
389       i2      = 3*m - 3;
390       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
391       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
392       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
393       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
394       idiag -= 9;
395       i2    -= 3;
396       for (i=m-2; i>=0; i--) {
397         v     = aa + 9*(diag[i]+1);
398         vi    = aj + diag[i] + 1;
399         nz    = ai[i+1] - diag[i] - 1;
400         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
401         while (nz--) {
402           idx  = 3*(*vi++);
403           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
404           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
405           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
406           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
407           v   += 9;
408         }
409         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
410         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
411         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
412         idiag   -= 9;
413         i2      -= 3;
414       }
415       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
416     }
417   } else {
418     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
419   }
420   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
421   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
427 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
428 {
429   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
430   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
431   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
432   PetscErrorCode     ierr;
433   PetscInt           m = a->mbs,i,i2,nz,idx;
434   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
435 
436   PetscFunctionBegin;
437   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
438   its = its*lits;
439   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
440   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
441   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
442   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
443   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
444 
445   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
446 
447   diag  = a->diag;
448   idiag = a->idiag;
449   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
450   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
451 
452   if (flag & SOR_ZERO_INITIAL_GUESS) {
453     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
454       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
455       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
456       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
457       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
458       i2     = 4;
459       idiag += 16;
460       for (i=1; i<m; i++) {
461 	v     = aa + 16*ai[i];
462 	vi    = aj + ai[i];
463 	nz    = diag[i] - ai[i];
464 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
465 	while (nz--) {
466 	  idx  = 4*(*vi++);
467 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
468 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
469 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
470 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
471 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
472 	  v   += 16;
473 	}
474 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
475 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
476 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
477 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
478         idiag   += 16;
479         i2      += 4;
480       }
481       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
482       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
483     }
484     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
485         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
486       i2    = 0;
487       mdiag = a->idiag+16*a->mbs;
488       for (i=0; i<m; i++) {
489         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
490         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
491         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
492         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
493         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
494         mdiag  += 16;
495         i2     += 4;
496       }
497       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
498     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
499       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
500     }
501     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
502       idiag   = a->idiag+16*a->mbs - 16;
503       i2      = 4*m - 4;
504       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
505       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
506       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
507       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
508       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
509       idiag -= 16;
510       i2    -= 4;
511       for (i=m-2; i>=0; i--) {
512 	v     = aa + 16*(diag[i]+1);
513 	vi    = aj + diag[i] + 1;
514 	nz    = ai[i+1] - diag[i] - 1;
515 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
516 	while (nz--) {
517 	  idx  = 4*(*vi++);
518 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
519 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
520 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
521 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
522 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
523 	  v   += 16;
524 	}
525 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
526 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
527 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
528 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
529         idiag   -= 16;
530         i2      -= 4;
531       }
532       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
533     }
534   } else {
535     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
536   }
537   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
538   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
539   PetscFunctionReturn(0);
540 }
541 
542 #undef __FUNCT__
543 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
544 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
545 {
546   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
547   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
548   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
549   PetscErrorCode     ierr;
550   PetscInt           m = a->mbs,i,i2,nz,idx;
551   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
552 
553   PetscFunctionBegin;
554   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
555   its = its*lits;
556   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
557   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
558   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
559   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
560   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
561 
562   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
563 
564   diag  = a->diag;
565   idiag = a->idiag;
566   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
567   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
568 
569   if (flag & SOR_ZERO_INITIAL_GUESS) {
570     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
571       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
572       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
573       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
574       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
575       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
576       i2     = 5;
577       idiag += 25;
578       for (i=1; i<m; i++) {
579 	v     = aa + 25*ai[i];
580 	vi    = aj + ai[i];
581 	nz    = diag[i] - ai[i];
582 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
583 	while (nz--) {
584 	  idx  = 5*(*vi++);
585 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
586 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
587 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
588 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
589 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
590 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
591 	  v   += 25;
592 	}
593 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
594 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
595 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
596 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
597 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
598         idiag   += 25;
599         i2      += 5;
600       }
601       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
602       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
603     }
604     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
605         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
606       i2    = 0;
607       mdiag = a->idiag+25*a->mbs;
608       for (i=0; i<m; i++) {
609         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
610         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
611         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
612         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
613         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
614         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
615         mdiag  += 25;
616         i2     += 5;
617       }
618       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
619     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
620       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
621     }
622     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
623       idiag   = a->idiag+25*a->mbs - 25;
624       i2      = 5*m - 5;
625       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
626       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
627       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
628       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
629       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
630       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
631       idiag -= 25;
632       i2    -= 5;
633       for (i=m-2; i>=0; i--) {
634 	v     = aa + 25*(diag[i]+1);
635 	vi    = aj + diag[i] + 1;
636 	nz    = ai[i+1] - diag[i] - 1;
637 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
638 	while (nz--) {
639 	  idx  = 5*(*vi++);
640 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
641 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
642 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
643 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
644 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
645 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
646 	  v   += 25;
647 	}
648 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
649 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
650 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
651 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
652 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
653         idiag   -= 25;
654         i2      -= 5;
655       }
656       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
657     }
658   } else {
659     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
660   }
661   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
662   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
668 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
669 {
670   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
671   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
672   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
673   PetscErrorCode     ierr;
674   PetscInt           m = a->mbs,i,i2,nz,idx;
675   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
676 
677   PetscFunctionBegin;
678   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
679   its = its*lits;
680   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
681   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
682   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
683   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
684   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
685 
686   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
687 
688   diag  = a->diag;
689   idiag = a->idiag;
690   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
691   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
692 
693   if (flag & SOR_ZERO_INITIAL_GUESS) {
694     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
695       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
696       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
697       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
698       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
699       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
700       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
701       i2     = 6;
702       idiag += 36;
703       for (i=1; i<m; i++) {
704         v     = aa + 36*ai[i];
705         vi    = aj + ai[i];
706         nz    = diag[i] - ai[i];
707         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
708         while (nz--) {
709           idx  = 6*(*vi++);
710           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
711           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
712           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
713           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
714           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
715           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
716           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
717           v   += 36;
718         }
719         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
720         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
721         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
722         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
723         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
724         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
725         idiag   += 36;
726         i2      += 6;
727       }
728       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
729       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
730     }
731     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
732         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
733       i2    = 0;
734       mdiag = a->idiag+36*a->mbs;
735       for (i=0; i<m; i++) {
736         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
737         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
738         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
739         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
740         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
741         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
742         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
743         mdiag  += 36;
744         i2     += 6;
745       }
746       ierr = PetscLogFlops(60*m);CHKERRQ(ierr);
747     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
748       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
749     }
750     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
751       idiag   = a->idiag+36*a->mbs - 36;
752       i2      = 6*m - 6;
753       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
754       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
755       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
756       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
757       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
758       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
759       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
760       idiag -= 36;
761       i2    -= 6;
762       for (i=m-2; i>=0; i--) {
763         v     = aa + 36*(diag[i]+1);
764         vi    = aj + diag[i] + 1;
765         nz    = ai[i+1] - diag[i] - 1;
766         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
767         while (nz--) {
768           idx  = 6*(*vi++);
769           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
770           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
771           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
772           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
773           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
774           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
775           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
776           v   += 36;
777         }
778         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
779         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
780         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
781         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
782         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
783         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
784         idiag   -= 36;
785         i2      -= 6;
786       }
787       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
788     }
789   } else {
790     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
791   }
792   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
793   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
799 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
800 {
801   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
802   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
803   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
804   PetscErrorCode     ierr;
805   PetscInt           m = a->mbs,i,i2,nz,idx;
806   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
807 
808   PetscFunctionBegin;
809   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
810   its = its*lits;
811   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
812   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
813   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
814   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
815   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
816 
817   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
818 
819   diag  = a->diag;
820   idiag = a->idiag;
821   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
822   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
823 
824   if (flag & SOR_ZERO_INITIAL_GUESS) {
825     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
826       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
827       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
828       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
829       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
830       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
831       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
832       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
833       i2     = 7;
834       idiag += 49;
835       for (i=1; i<m; i++) {
836         v     = aa + 49*ai[i];
837         vi    = aj + ai[i];
838         nz    = diag[i] - ai[i];
839         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
840         while (nz--) {
841           idx  = 7*(*vi++);
842           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
843           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
844           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
845           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
846           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
847           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
848           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
849           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
850           v   += 49;
851         }
852         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
853         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
854         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
855         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
856         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
857         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
858         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
859         idiag   += 49;
860         i2      += 7;
861       }
862       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
863       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
864     }
865     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
866         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
867       i2    = 0;
868       mdiag = a->idiag+49*a->mbs;
869       for (i=0; i<m; i++) {
870         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
871         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
872         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
873         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
874         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
875         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
876         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
877         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
878         mdiag  += 36;
879         i2     += 6;
880       }
881       ierr = PetscLogFlops(93*m);CHKERRQ(ierr);
882     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
883       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
884     }
885     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
886       idiag   = a->idiag+49*a->mbs - 49;
887       i2      = 7*m - 7;
888       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
889       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
890       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
891       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
892       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
893       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
894       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
895       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
896       idiag -= 49;
897       i2    -= 7;
898       for (i=m-2; i>=0; i--) {
899         v     = aa + 49*(diag[i]+1);
900         vi    = aj + diag[i] + 1;
901         nz    = ai[i+1] - diag[i] - 1;
902         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
903         while (nz--) {
904           idx  = 7*(*vi++);
905           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
906           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
907           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
908           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
909           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
910           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
911           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
912           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
913           v   += 49;
914         }
915         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
916         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
917         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
918         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
919         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
920         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
921         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
922         idiag   -= 49;
923         i2      -= 7;
924       }
925       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
926     }
927   } else {
928     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
929   }
930   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
931   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
932   PetscFunctionReturn(0);
933 }
934 
935 /*
936     Special version for direct calls from Fortran (Used in PETSc-fun3d)
937 */
938 #if defined(PETSC_HAVE_FORTRAN_CAPS)
939 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
940 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
941 #define matsetvaluesblocked4_ matsetvaluesblocked4
942 #endif
943 
944 EXTERN_C_BEGIN
945 #undef __FUNCT__
946 #define __FUNCT__ "matsetvaluesblocked4_"
947 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
948 {
949   Mat               A = *AA;
950   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
951   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
952   PetscInt          *ai=a->i,*ailen=a->ilen;
953   PetscInt          *aj=a->j,stepval,lastcol = -1;
954   const PetscScalar *value = v;
955   MatScalar         *ap,*aa = a->a,*bap;
956 
957   PetscFunctionBegin;
958   if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
959   stepval = (n-1)*4;
960   for (k=0; k<m; k++) { /* loop over added rows */
961     row  = im[k];
962     rp   = aj + ai[row];
963     ap   = aa + 16*ai[row];
964     nrow = ailen[row];
965     low  = 0;
966     high = nrow;
967     for (l=0; l<n; l++) { /* loop over added columns */
968       col = in[l];
969       if (col <= lastcol) low = 0; else high = nrow;
970       lastcol = col;
971       value = v + k*(stepval+4 + l)*4;
972       while (high-low > 7) {
973         t = (low+high)/2;
974         if (rp[t] > col) high = t;
975         else             low  = t;
976       }
977       for (i=low; i<high; i++) {
978         if (rp[i] > col) break;
979         if (rp[i] == col) {
980           bap  = ap +  16*i;
981           for (ii=0; ii<4; ii++,value+=stepval) {
982             for (jj=ii; jj<16; jj+=4) {
983               bap[jj] += *value++;
984             }
985           }
986           goto noinsert2;
987         }
988       }
989       N = nrow++ - 1;
990       high++; /* added new column index thus must search to one higher than before */
991       /* shift up all the later entries in this row */
992       for (ii=N; ii>=i; ii--) {
993         rp[ii+1] = rp[ii];
994         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
995       }
996       if (N >= i) {
997         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
998       }
999       rp[i] = col;
1000       bap   = ap +  16*i;
1001       for (ii=0; ii<4; ii++,value+=stepval) {
1002         for (jj=ii; jj<16; jj+=4) {
1003           bap[jj] = *value++;
1004         }
1005       }
1006       noinsert2:;
1007       low = i;
1008     }
1009     ailen[row] = nrow;
1010   }
1011   PetscFunctionReturnVoid();
1012 }
1013 EXTERN_C_END
1014 
1015 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1016 #define matsetvalues4_ MATSETVALUES4
1017 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1018 #define matsetvalues4_ matsetvalues4
1019 #endif
1020 
1021 EXTERN_C_BEGIN
1022 #undef __FUNCT__
1023 #define __FUNCT__ "MatSetValues4_"
1024 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1025 {
1026   Mat         A = *AA;
1027   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1028   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1029   PetscInt    *ai=a->i,*ailen=a->ilen;
1030   PetscInt    *aj=a->j,brow,bcol;
1031   PetscInt    ridx,cidx,lastcol = -1;
1032   MatScalar   *ap,value,*aa=a->a,*bap;
1033 
1034   PetscFunctionBegin;
1035   for (k=0; k<m; k++) { /* loop over added rows */
1036     row  = im[k]; brow = row/4;
1037     rp   = aj + ai[brow];
1038     ap   = aa + 16*ai[brow];
1039     nrow = ailen[brow];
1040     low  = 0;
1041     high = nrow;
1042     for (l=0; l<n; l++) { /* loop over added columns */
1043       col = in[l]; bcol = col/4;
1044       ridx = row % 4; cidx = col % 4;
1045       value = v[l + k*n];
1046       if (col <= lastcol) low = 0; else high = nrow;
1047       lastcol = col;
1048       while (high-low > 7) {
1049         t = (low+high)/2;
1050         if (rp[t] > bcol) high = t;
1051         else              low  = t;
1052       }
1053       for (i=low; i<high; i++) {
1054         if (rp[i] > bcol) break;
1055         if (rp[i] == bcol) {
1056           bap  = ap +  16*i + 4*cidx + ridx;
1057           *bap += value;
1058           goto noinsert1;
1059         }
1060       }
1061       N = nrow++ - 1;
1062       high++; /* added new column thus must search to one higher than before */
1063       /* shift up all the later entries in this row */
1064       for (ii=N; ii>=i; ii--) {
1065         rp[ii+1] = rp[ii];
1066         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1067       }
1068       if (N>=i) {
1069         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1070       }
1071       rp[i]                    = bcol;
1072       ap[16*i + 4*cidx + ridx] = value;
1073       noinsert1:;
1074       low = i;
1075     }
1076     ailen[brow] = nrow;
1077   }
1078   PetscFunctionReturnVoid();
1079 }
1080 EXTERN_C_END
1081 
1082 /*  UGLY, ugly, ugly
1083    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
1084    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
1085    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
1086    converts the entries into single precision and then calls ..._MatScalar() to put them
1087    into the single precision data structures.
1088 */
1089 #if defined(PETSC_USE_MAT_SINGLE)
1090 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
1091 #else
1092 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
1093 #endif
1094 
1095 #define CHUNKSIZE  10
1096 
1097 /*
1098      Checks for missing diagonals
1099 */
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1102 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1103 {
1104   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       *diag,*jj = a->j,i;
1107 
1108   PetscFunctionBegin;
1109   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1110   *missing = PETSC_FALSE;
1111   diag     = a->diag;
1112   for (i=0; i<a->mbs; i++) {
1113     if (jj[diag[i]] != i) {
1114       *missing  = PETSC_TRUE;
1115       if (d) *d = i;
1116     }
1117   }
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1123 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1124 {
1125   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1126   PetscErrorCode ierr;
1127   PetscInt       i,j,m = a->mbs;
1128 
1129   PetscFunctionBegin;
1130   if (!a->diag) {
1131     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1132   }
1133   for (i=0; i<m; i++) {
1134     a->diag[i] = a->i[i+1];
1135     for (j=a->i[i]; j<a->i[i+1]; j++) {
1136       if (a->j[j] == i) {
1137         a->diag[i] = j;
1138         break;
1139       }
1140     }
1141   }
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 
1146 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1150 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1151 {
1152   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1153   PetscErrorCode ierr;
1154   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt;
1155   PetscInt       *tia, *tja;
1156 
1157   PetscFunctionBegin;
1158   *nn = n;
1159   if (!ia) PetscFunctionReturn(0);
1160   if (symmetric) {
1161     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1162   } else {
1163     tia = a->i; tja = a->j;
1164   }
1165 
1166   if (!blockcompressed && bs > 1) {
1167     (*nn) *= bs;
1168     nbs    = bs;
1169     /* malloc & create the natural set of indices */
1170     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1171     if (n) {
1172       (*ia)[0] = 0;
1173       for (j=1; j<bs; j++) {
1174         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1175       }
1176     }
1177 
1178     for (i=1; i<n; i++) {
1179       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1180       for (j=1; j<bs; j++) {
1181         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1182       }
1183     }
1184     if (n) {
1185       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1186     }
1187 
1188     if (ja) {
1189       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1190       cnt = 0;
1191       for (i=0; i<n; i++) {
1192         for (j=0; j<bs; j++) {
1193           for (k=tia[i]; k<tia[i+1]; k++) {
1194             for (l=0; l<bs; l++) {
1195               (*ja)[cnt++] = bs*tja[k] + l;
1196 	    }
1197           }
1198         }
1199       }
1200     }
1201 
1202     n     *= bs;
1203     nz *= bs*bs;
1204     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1205       ierr = PetscFree(tia);CHKERRQ(ierr);
1206       ierr = PetscFree(tja);CHKERRQ(ierr);
1207     }
1208   } else {
1209     *ia = tia;
1210     if (ja) *ja = tja;
1211   }
1212   if (oshift == 1) {
1213     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1214     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1215   }
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1221 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1222 {
1223   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1224   PetscErrorCode ierr;
1225   PetscInt       i,n = a->mbs,nz = a->i[n];
1226 
1227   PetscFunctionBegin;
1228   if (!ia) PetscFunctionReturn(0);
1229   if (!blockcompressed && A->rmap.bs > 1) {
1230     ierr = PetscFree(*ia);CHKERRQ(ierr);
1231     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1232   } else if (symmetric) {
1233     ierr = PetscFree(*ia);CHKERRQ(ierr);
1234     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1235   } else if (oshift == 1) { /* blockcompressed */
1236     for (i=0; i<n+1; i++) a->i[i]--;
1237     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1238   }
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1244 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1245 {
1246   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250 #if defined(PETSC_USE_LOG)
1251   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
1252 #endif
1253   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1254   if (a->row) {
1255     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1256   }
1257   if (a->col) {
1258     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1259   }
1260   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1261   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1262   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1263   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1264   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1265   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1266   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1267 #if defined(PETSC_USE_MAT_SINGLE)
1268   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
1269 #endif
1270   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1271   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1272 
1273   ierr = PetscFree(a);CHKERRQ(ierr);
1274 
1275   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1288 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1289 {
1290   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1291   PetscErrorCode ierr;
1292 
1293   PetscFunctionBegin;
1294   switch (op) {
1295   case MAT_ROW_ORIENTED:
1296     a->roworiented    = flg;
1297     break;
1298   case MAT_KEEP_ZEROED_ROWS:
1299     a->keepzeroedrows = flg;
1300     break;
1301   case MAT_NEW_NONZERO_LOCATIONS:
1302     a->nonew          = (flg ? 0 : 1);
1303     break;
1304   case MAT_NEW_NONZERO_LOCATION_ERR:
1305     a->nonew          = (flg ? -1 : 0);
1306     break;
1307   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1308     a->nonew          = (flg ? -2 : 0);
1309     break;
1310   case MAT_NEW_DIAGONALS:
1311   case MAT_IGNORE_OFF_PROC_ENTRIES:
1312   case MAT_USE_HASH_TABLE:
1313     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1314     break;
1315   case MAT_SYMMETRIC:
1316   case MAT_STRUCTURALLY_SYMMETRIC:
1317   case MAT_HERMITIAN:
1318   case MAT_SYMMETRY_ETERNAL:
1319     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1320     break;
1321   default:
1322     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1323   }
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1329 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1330 {
1331   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1332   PetscErrorCode ierr;
1333   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1334   MatScalar      *aa,*aa_i;
1335   PetscScalar    *v_i;
1336 
1337   PetscFunctionBegin;
1338   bs  = A->rmap.bs;
1339   ai  = a->i;
1340   aj  = a->j;
1341   aa  = a->a;
1342   bs2 = a->bs2;
1343 
1344   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1345 
1346   bn  = row/bs;   /* Block number */
1347   bp  = row % bs; /* Block Position */
1348   M   = ai[bn+1] - ai[bn];
1349   *nz = bs*M;
1350 
1351   if (v) {
1352     *v = 0;
1353     if (*nz) {
1354       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1355       for (i=0; i<M; i++) { /* for each block in the block row */
1356         v_i  = *v + i*bs;
1357         aa_i = aa + bs2*(ai[bn] + i);
1358         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1359       }
1360     }
1361   }
1362 
1363   if (idx) {
1364     *idx = 0;
1365     if (*nz) {
1366       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1367       for (i=0; i<M; i++) { /* for each block in the block row */
1368         idx_i = *idx + i*bs;
1369         itmp  = bs*aj[ai[bn] + i];
1370         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1371       }
1372     }
1373   }
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1379 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1380 {
1381   PetscErrorCode ierr;
1382 
1383   PetscFunctionBegin;
1384   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1385   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1391 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1392 {
1393   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1394   Mat            C;
1395   PetscErrorCode ierr;
1396   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1397   PetscInt       *rows,*cols,bs2=a->bs2;
1398   PetscScalar    *array;
1399 
1400   PetscFunctionBegin;
1401   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1402   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1403   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1404 
1405 #if defined(PETSC_USE_MAT_SINGLE)
1406   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1407   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1408 #else
1409   array = a->a;
1410 #endif
1411 
1412   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1413   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1414   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1415   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1416   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1417   ierr = PetscFree(col);CHKERRQ(ierr);
1418   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1419   cols = rows + bs;
1420   for (i=0; i<mbs; i++) {
1421     cols[0] = i*bs;
1422     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1423     len = ai[i+1] - ai[i];
1424     for (j=0; j<len; j++) {
1425       rows[0] = (*aj++)*bs;
1426       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1427       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1428       array += bs2;
1429     }
1430   }
1431   ierr = PetscFree(rows);CHKERRQ(ierr);
1432 #if defined(PETSC_USE_MAT_SINGLE)
1433   ierr = PetscFree(array);
1434 #endif
1435 
1436   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1437   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1438 
1439   if (B) {
1440     *B = C;
1441   } else {
1442     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1443   }
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1449 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1450 {
1451   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1452   PetscErrorCode ierr;
1453   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1454   int            fd;
1455   PetscScalar    *aa;
1456   FILE           *file;
1457 
1458   PetscFunctionBegin;
1459   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1460   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1461   col_lens[0] = MAT_FILE_COOKIE;
1462 
1463   col_lens[1] = A->rmap.N;
1464   col_lens[2] = A->cmap.n;
1465   col_lens[3] = a->nz*bs2;
1466 
1467   /* store lengths of each row and write (including header) to file */
1468   count = 0;
1469   for (i=0; i<a->mbs; i++) {
1470     for (j=0; j<bs; j++) {
1471       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1472     }
1473   }
1474   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1475   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1476 
1477   /* store column indices (zero start index) */
1478   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1479   count = 0;
1480   for (i=0; i<a->mbs; i++) {
1481     for (j=0; j<bs; j++) {
1482       for (k=a->i[i]; k<a->i[i+1]; k++) {
1483         for (l=0; l<bs; l++) {
1484           jj[count++] = bs*a->j[k] + l;
1485         }
1486       }
1487     }
1488   }
1489   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1490   ierr = PetscFree(jj);CHKERRQ(ierr);
1491 
1492   /* store nonzero values */
1493   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1494   count = 0;
1495   for (i=0; i<a->mbs; i++) {
1496     for (j=0; j<bs; j++) {
1497       for (k=a->i[i]; k<a->i[i+1]; k++) {
1498         for (l=0; l<bs; l++) {
1499           aa[count++] = a->a[bs2*k + l*bs + j];
1500         }
1501       }
1502     }
1503   }
1504   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1505   ierr = PetscFree(aa);CHKERRQ(ierr);
1506 
1507   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1508   if (file) {
1509     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1510   }
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1516 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1517 {
1518   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1519   PetscErrorCode    ierr;
1520   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1521   PetscViewerFormat format;
1522 
1523   PetscFunctionBegin;
1524   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1525   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1526     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1527   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1528     Mat aij;
1529     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1530     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1531     ierr = MatDestroy(aij);CHKERRQ(ierr);
1532   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1533      PetscFunctionReturn(0);
1534   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1535     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1536     for (i=0; i<a->mbs; i++) {
1537       for (j=0; j<bs; j++) {
1538         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1539         for (k=a->i[i]; k<a->i[i+1]; k++) {
1540           for (l=0; l<bs; l++) {
1541 #if defined(PETSC_USE_COMPLEX)
1542             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1543               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1544                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1545             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1546               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1547                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1548             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1549               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1550             }
1551 #else
1552             if (a->a[bs2*k + l*bs + j] != 0.0) {
1553               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1554             }
1555 #endif
1556           }
1557         }
1558         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1559       }
1560     }
1561     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1562   } else {
1563     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1564     for (i=0; i<a->mbs; i++) {
1565       for (j=0; j<bs; j++) {
1566         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1567         for (k=a->i[i]; k<a->i[i+1]; k++) {
1568           for (l=0; l<bs; l++) {
1569 #if defined(PETSC_USE_COMPLEX)
1570             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1571               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1572                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1573             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1574               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1575                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1576             } else {
1577               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1578             }
1579 #else
1580             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1581 #endif
1582           }
1583         }
1584         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1585       }
1586     }
1587     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1588   }
1589   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNCT__
1594 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1595 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1596 {
1597   Mat            A = (Mat) Aa;
1598   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1599   PetscErrorCode ierr;
1600   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1601   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1602   MatScalar      *aa;
1603   PetscViewer    viewer;
1604 
1605   PetscFunctionBegin;
1606 
1607   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1608   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1609 
1610   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1611 
1612   /* loop over matrix elements drawing boxes */
1613   color = PETSC_DRAW_BLUE;
1614   for (i=0,row=0; i<mbs; i++,row+=bs) {
1615     for (j=a->i[i]; j<a->i[i+1]; j++) {
1616       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1617       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1618       aa = a->a + j*bs2;
1619       for (k=0; k<bs; k++) {
1620         for (l=0; l<bs; l++) {
1621           if (PetscRealPart(*aa++) >=  0.) continue;
1622           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1623         }
1624       }
1625     }
1626   }
1627   color = PETSC_DRAW_CYAN;
1628   for (i=0,row=0; i<mbs; i++,row+=bs) {
1629     for (j=a->i[i]; j<a->i[i+1]; j++) {
1630       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1631       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1632       aa = a->a + j*bs2;
1633       for (k=0; k<bs; k++) {
1634         for (l=0; l<bs; l++) {
1635           if (PetscRealPart(*aa++) != 0.) continue;
1636           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1637         }
1638       }
1639     }
1640   }
1641 
1642   color = PETSC_DRAW_RED;
1643   for (i=0,row=0; i<mbs; i++,row+=bs) {
1644     for (j=a->i[i]; j<a->i[i+1]; j++) {
1645       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1646       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1647       aa = a->a + j*bs2;
1648       for (k=0; k<bs; k++) {
1649         for (l=0; l<bs; l++) {
1650           if (PetscRealPart(*aa++) <= 0.) continue;
1651           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1652         }
1653       }
1654     }
1655   }
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 #undef __FUNCT__
1660 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1661 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1662 {
1663   PetscErrorCode ierr;
1664   PetscReal      xl,yl,xr,yr,w,h;
1665   PetscDraw      draw;
1666   PetscTruth     isnull;
1667 
1668   PetscFunctionBegin;
1669 
1670   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1671   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1672 
1673   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1674   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1675   xr += w;    yr += h;  xl = -w;     yl = -h;
1676   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1677   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1678   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatView_SeqBAIJ"
1684 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1685 {
1686   PetscErrorCode ierr;
1687   PetscTruth     iascii,isbinary,isdraw;
1688 
1689   PetscFunctionBegin;
1690   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1691   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1692   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1693   if (iascii){
1694     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1695   } else if (isbinary) {
1696     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1697   } else if (isdraw) {
1698     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1699   } else {
1700     Mat B;
1701     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1702     ierr = MatView(B,viewer);CHKERRQ(ierr);
1703     ierr = MatDestroy(B);CHKERRQ(ierr);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1711 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1712 {
1713   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1714   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1715   PetscInt    *ai = a->i,*ailen = a->ilen;
1716   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1717   MatScalar   *ap,*aa = a->a;
1718 
1719   PetscFunctionBegin;
1720   for (k=0; k<m; k++) { /* loop over rows */
1721     row  = im[k]; brow = row/bs;
1722     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1723     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1724     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1725     nrow = ailen[brow];
1726     for (l=0; l<n; l++) { /* loop over columns */
1727       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1728       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1729       col  = in[l] ;
1730       bcol = col/bs;
1731       cidx = col%bs;
1732       ridx = row%bs;
1733       high = nrow;
1734       low  = 0; /* assume unsorted */
1735       while (high-low > 5) {
1736         t = (low+high)/2;
1737         if (rp[t] > bcol) high = t;
1738         else             low  = t;
1739       }
1740       for (i=low; i<high; i++) {
1741         if (rp[i] > bcol) break;
1742         if (rp[i] == bcol) {
1743           *v++ = ap[bs2*i+bs*cidx+ridx];
1744           goto finished;
1745         }
1746       }
1747       *v++ = 0.0;
1748       finished:;
1749     }
1750   }
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #if defined(PETSC_USE_MAT_SINGLE)
1755 #undef __FUNCT__
1756 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1757 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1758 {
1759   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1760   PetscErrorCode ierr;
1761   PetscInt       i,N = m*n*b->bs2;
1762   MatScalar      *vsingle;
1763 
1764   PetscFunctionBegin;
1765   if (N > b->setvalueslen) {
1766     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1767     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1768     b->setvalueslen  = N;
1769   }
1770   vsingle = b->setvaluescopy;
1771   for (i=0; i<N; i++) {
1772     vsingle[i] = v[i];
1773   }
1774   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 #endif
1778 
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1782 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1783 {
1784   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1785   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1786   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1787   PetscErrorCode  ierr;
1788   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1789   PetscTruth      roworiented=a->roworiented;
1790   const MatScalar *value = v;
1791   MatScalar       *ap,*aa = a->a,*bap;
1792 
1793   PetscFunctionBegin;
1794   if (roworiented) {
1795     stepval = (n-1)*bs;
1796   } else {
1797     stepval = (m-1)*bs;
1798   }
1799   for (k=0; k<m; k++) { /* loop over added rows */
1800     row  = im[k];
1801     if (row < 0) continue;
1802 #if defined(PETSC_USE_DEBUG)
1803     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1804 #endif
1805     rp   = aj + ai[row];
1806     ap   = aa + bs2*ai[row];
1807     rmax = imax[row];
1808     nrow = ailen[row];
1809     low  = 0;
1810     high = nrow;
1811     for (l=0; l<n; l++) { /* loop over added columns */
1812       if (in[l] < 0) continue;
1813 #if defined(PETSC_USE_DEBUG)
1814       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1815 #endif
1816       col = in[l];
1817       if (roworiented) {
1818         value = v + k*(stepval+bs)*bs + l*bs;
1819       } else {
1820         value = v + l*(stepval+bs)*bs + k*bs;
1821       }
1822       if (col <= lastcol) low = 0; else high = nrow;
1823       lastcol = col;
1824       while (high-low > 7) {
1825         t = (low+high)/2;
1826         if (rp[t] > col) high = t;
1827         else             low  = t;
1828       }
1829       for (i=low; i<high; i++) {
1830         if (rp[i] > col) break;
1831         if (rp[i] == col) {
1832           bap  = ap +  bs2*i;
1833           if (roworiented) {
1834             if (is == ADD_VALUES) {
1835               for (ii=0; ii<bs; ii++,value+=stepval) {
1836                 for (jj=ii; jj<bs2; jj+=bs) {
1837                   bap[jj] += *value++;
1838                 }
1839               }
1840             } else {
1841               for (ii=0; ii<bs; ii++,value+=stepval) {
1842                 for (jj=ii; jj<bs2; jj+=bs) {
1843                   bap[jj] = *value++;
1844                 }
1845               }
1846             }
1847           } else {
1848             if (is == ADD_VALUES) {
1849               for (ii=0; ii<bs; ii++,value+=stepval) {
1850                 for (jj=0; jj<bs; jj++) {
1851                   *bap++ += *value++;
1852                 }
1853               }
1854             } else {
1855               for (ii=0; ii<bs; ii++,value+=stepval) {
1856                 for (jj=0; jj<bs; jj++) {
1857                   *bap++  = *value++;
1858                 }
1859               }
1860             }
1861           }
1862           goto noinsert2;
1863         }
1864       }
1865       if (nonew == 1) goto noinsert2;
1866       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1867       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1868       N = nrow++ - 1; high++;
1869       /* shift up all the later entries in this row */
1870       for (ii=N; ii>=i; ii--) {
1871         rp[ii+1] = rp[ii];
1872         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1873       }
1874       if (N >= i) {
1875         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1876       }
1877       rp[i] = col;
1878       bap   = ap +  bs2*i;
1879       if (roworiented) {
1880         for (ii=0; ii<bs; ii++,value+=stepval) {
1881           for (jj=ii; jj<bs2; jj+=bs) {
1882             bap[jj] = *value++;
1883           }
1884         }
1885       } else {
1886         for (ii=0; ii<bs; ii++,value+=stepval) {
1887           for (jj=0; jj<bs; jj++) {
1888             *bap++  = *value++;
1889           }
1890         }
1891       }
1892       noinsert2:;
1893       low = i;
1894     }
1895     ailen[row] = nrow;
1896   }
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1902 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1903 {
1904   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1905   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1906   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1907   PetscErrorCode ierr;
1908   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1909   MatScalar      *aa = a->a,*ap;
1910   PetscReal      ratio=0.6;
1911 
1912   PetscFunctionBegin;
1913   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1914 
1915   if (m) rmax = ailen[0];
1916   for (i=1; i<mbs; i++) {
1917     /* move each row back by the amount of empty slots (fshift) before it*/
1918     fshift += imax[i-1] - ailen[i-1];
1919     rmax   = PetscMax(rmax,ailen[i]);
1920     if (fshift) {
1921       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1922       N = ailen[i];
1923       for (j=0; j<N; j++) {
1924         ip[j-fshift] = ip[j];
1925         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1926       }
1927     }
1928     ai[i] = ai[i-1] + ailen[i-1];
1929   }
1930   if (mbs) {
1931     fshift += imax[mbs-1] - ailen[mbs-1];
1932     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1933   }
1934   /* reset ilen and imax for each row */
1935   for (i=0; i<mbs; i++) {
1936     ailen[i] = imax[i] = ai[i+1] - ai[i];
1937   }
1938   a->nz = ai[mbs];
1939 
1940   /* diagonals may have moved, so kill the diagonal pointers */
1941   a->idiagvalid = PETSC_FALSE;
1942   if (fshift && a->diag) {
1943     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1944     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1945     a->diag = 0;
1946   }
1947   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1948   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1949   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1950   a->reallocs          = 0;
1951   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1952 
1953   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1954   if (a->compressedrow.use){
1955     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1956   }
1957 
1958   A->same_nonzero = PETSC_TRUE;
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 /*
1963    This function returns an array of flags which indicate the locations of contiguous
1964    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1965    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1966    Assume: sizes should be long enough to hold all the values.
1967 */
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1970 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1971 {
1972   PetscInt   i,j,k,row;
1973   PetscTruth flg;
1974 
1975   PetscFunctionBegin;
1976   for (i=0,j=0; i<n; j++) {
1977     row = idx[i];
1978     if (row%bs!=0) { /* Not the begining of a block */
1979       sizes[j] = 1;
1980       i++;
1981     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1982       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1983       i++;
1984     } else { /* Begining of the block, so check if the complete block exists */
1985       flg = PETSC_TRUE;
1986       for (k=1; k<bs; k++) {
1987         if (row+k != idx[i+k]) { /* break in the block */
1988           flg = PETSC_FALSE;
1989           break;
1990         }
1991       }
1992       if (flg) { /* No break in the bs */
1993         sizes[j] = bs;
1994         i+= bs;
1995       } else {
1996         sizes[j] = 1;
1997         i++;
1998       }
1999     }
2000   }
2001   *bs_max = j;
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 #undef __FUNCT__
2006 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2007 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2008 {
2009   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2010   PetscErrorCode ierr;
2011   PetscInt       i,j,k,count,*rows;
2012   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
2013   PetscScalar    zero = 0.0;
2014   MatScalar      *aa;
2015 
2016   PetscFunctionBegin;
2017   /* Make a copy of the IS and  sort it */
2018   /* allocate memory for rows,sizes */
2019   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
2020   sizes = rows + is_n;
2021 
2022   /* copy IS values to rows, and sort them */
2023   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2024   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2025   if (baij->keepzeroedrows) {
2026     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2027     bs_max = is_n;
2028     A->same_nonzero = PETSC_TRUE;
2029   } else {
2030     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2031     A->same_nonzero = PETSC_FALSE;
2032   }
2033 
2034   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2035     row   = rows[j];
2036     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2037     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2038     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2039     if (sizes[i] == bs && !baij->keepzeroedrows) {
2040       if (diag != 0.0) {
2041         if (baij->ilen[row/bs] > 0) {
2042           baij->ilen[row/bs]       = 1;
2043           baij->j[baij->i[row/bs]] = row/bs;
2044           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2045         }
2046         /* Now insert all the diagonal values for this bs */
2047         for (k=0; k<bs; k++) {
2048           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2049         }
2050       } else { /* (diag == 0.0) */
2051         baij->ilen[row/bs] = 0;
2052       } /* end (diag == 0.0) */
2053     } else { /* (sizes[i] != bs) */
2054 #if defined (PETSC_USE_DEBUG)
2055       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2056 #endif
2057       for (k=0; k<count; k++) {
2058         aa[0] =  zero;
2059         aa    += bs;
2060       }
2061       if (diag != 0.0) {
2062         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2063       }
2064     }
2065   }
2066 
2067   ierr = PetscFree(rows);CHKERRQ(ierr);
2068   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2074 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2075 {
2076   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2077   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2078   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2079   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
2080   PetscErrorCode ierr;
2081   PetscInt       ridx,cidx,bs2=a->bs2;
2082   PetscTruth     roworiented=a->roworiented;
2083   MatScalar      *ap,value,*aa=a->a,*bap;
2084 
2085   PetscFunctionBegin;
2086   for (k=0; k<m; k++) { /* loop over added rows */
2087     row  = im[k];
2088     brow = row/bs;
2089     if (row < 0) continue;
2090 #if defined(PETSC_USE_DEBUG)
2091     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
2092 #endif
2093     rp   = aj + ai[brow];
2094     ap   = aa + bs2*ai[brow];
2095     rmax = imax[brow];
2096     nrow = ailen[brow];
2097     low  = 0;
2098     high = nrow;
2099     for (l=0; l<n; l++) { /* loop over added columns */
2100       if (in[l] < 0) continue;
2101 #if defined(PETSC_USE_DEBUG)
2102       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
2103 #endif
2104       col = in[l]; bcol = col/bs;
2105       ridx = row % bs; cidx = col % bs;
2106       if (roworiented) {
2107         value = v[l + k*n];
2108       } else {
2109         value = v[k + l*m];
2110       }
2111       if (col <= lastcol) low = 0; else high = nrow;
2112       lastcol = col;
2113       while (high-low > 7) {
2114         t = (low+high)/2;
2115         if (rp[t] > bcol) high = t;
2116         else              low  = t;
2117       }
2118       for (i=low; i<high; i++) {
2119         if (rp[i] > bcol) break;
2120         if (rp[i] == bcol) {
2121           bap  = ap +  bs2*i + bs*cidx + ridx;
2122           if (is == ADD_VALUES) *bap += value;
2123           else                  *bap  = value;
2124           goto noinsert1;
2125         }
2126       }
2127       if (nonew == 1) goto noinsert1;
2128       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2129       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2130       N = nrow++ - 1; high++;
2131       /* shift up all the later entries in this row */
2132       for (ii=N; ii>=i; ii--) {
2133         rp[ii+1] = rp[ii];
2134         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2135       }
2136       if (N>=i) {
2137         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2138       }
2139       rp[i]                      = bcol;
2140       ap[bs2*i + bs*cidx + ridx] = value;
2141       a->nz++;
2142       noinsert1:;
2143       low = i;
2144     }
2145     ailen[brow] = nrow;
2146   }
2147   A->same_nonzero = PETSC_FALSE;
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2154 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
2155 {
2156   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2157   Mat            outA;
2158   PetscErrorCode ierr;
2159   PetscTruth     row_identity,col_identity;
2160 
2161   PetscFunctionBegin;
2162   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2163   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2164   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2165   if (!row_identity || !col_identity) {
2166     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2167   }
2168 
2169   outA          = inA;
2170   inA->factor   = FACTOR_LU;
2171 
2172   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2173 
2174   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2175   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2176   a->row = row;
2177   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2178   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2179   a->col = col;
2180 
2181   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2182   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2183   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2184 
2185   /*
2186       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
2187       for ILU(0) factorization with natural ordering
2188   */
2189   if (inA->rmap.bs < 8) {
2190     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
2191   } else {
2192     if (!a->solve_work) {
2193       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2194       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2195     }
2196   }
2197 
2198   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
2199 
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 EXTERN_C_BEGIN
2204 #undef __FUNCT__
2205 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2206 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2207 {
2208   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2209   PetscInt    i,nz,nbs;
2210 
2211   PetscFunctionBegin;
2212   nz  = baij->maxnz/baij->bs2;
2213   nbs = baij->nbs;
2214   for (i=0; i<nz; i++) {
2215     baij->j[i] = indices[i];
2216   }
2217   baij->nz = nz;
2218   for (i=0; i<nbs; i++) {
2219     baij->ilen[i] = baij->imax[i];
2220   }
2221 
2222   PetscFunctionReturn(0);
2223 }
2224 EXTERN_C_END
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2228 /*@
2229     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2230        in the matrix.
2231 
2232   Input Parameters:
2233 +  mat - the SeqBAIJ matrix
2234 -  indices - the column indices
2235 
2236   Level: advanced
2237 
2238   Notes:
2239     This can be called if you have precomputed the nonzero structure of the
2240   matrix and want to provide it to the matrix object to improve the performance
2241   of the MatSetValues() operation.
2242 
2243     You MUST have set the correct numbers of nonzeros per row in the call to
2244   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2245 
2246     MUST be called before any calls to MatSetValues();
2247 
2248 @*/
2249 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2250 {
2251   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2252 
2253   PetscFunctionBegin;
2254   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2255   PetscValidPointer(indices,2);
2256   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2257   if (f) {
2258     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2259   } else {
2260     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2261   }
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2267 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2268 {
2269   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2270   PetscErrorCode ierr;
2271   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2272   PetscReal      atmp;
2273   PetscScalar    *x,zero = 0.0;
2274   MatScalar      *aa;
2275   PetscInt       ncols,brow,krow,kcol;
2276 
2277   PetscFunctionBegin;
2278   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2279   bs   = A->rmap.bs;
2280   aa   = a->a;
2281   ai   = a->i;
2282   aj   = a->j;
2283   mbs  = a->mbs;
2284 
2285   ierr = VecSet(v,zero);CHKERRQ(ierr);
2286   if (idx) {
2287     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
2288   }
2289   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2290   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2291   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2292   for (i=0; i<mbs; i++) {
2293     ncols = ai[1] - ai[0]; ai++;
2294     brow  = bs*i;
2295     for (j=0; j<ncols; j++){
2296       for (kcol=0; kcol<bs; kcol++){
2297         for (krow=0; krow<bs; krow++){
2298           atmp = PetscAbsScalar(*aa);aa++;
2299           row = brow + krow;    /* row index */
2300           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2301           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2302         }
2303       }
2304       aj++;
2305     }
2306   }
2307   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2308   PetscFunctionReturn(0);
2309 }
2310 
2311 #undef __FUNCT__
2312 #define __FUNCT__ "MatCopy_SeqBAIJ"
2313 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2314 {
2315   PetscErrorCode ierr;
2316 
2317   PetscFunctionBegin;
2318   /* If the two matrices have the same copy implementation, use fast copy. */
2319   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2320     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2321     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2322 
2323     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
2324       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2325     }
2326     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
2327   } else {
2328     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2329   }
2330   PetscFunctionReturn(0);
2331 }
2332 
2333 #undef __FUNCT__
2334 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2335 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2336 {
2337   PetscErrorCode ierr;
2338 
2339   PetscFunctionBegin;
2340   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2346 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2347 {
2348   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2349   PetscFunctionBegin;
2350   *array = a->a;
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 #undef __FUNCT__
2355 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2356 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2357 {
2358   PetscFunctionBegin;
2359   PetscFunctionReturn(0);
2360 }
2361 
2362 #include "petscblaslapack.h"
2363 #undef __FUNCT__
2364 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2365 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2366 {
2367   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2368   PetscErrorCode ierr;
2369   PetscInt       i,bs=Y->rmap.bs,j,bs2;
2370   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2371 
2372   PetscFunctionBegin;
2373   if (str == SAME_NONZERO_PATTERN) {
2374     PetscScalar alpha = a;
2375     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2376   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2377     if (y->xtoy && y->XtoY != X) {
2378       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2379       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2380     }
2381     if (!y->xtoy) { /* get xtoy */
2382       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2383       y->XtoY = X;
2384     }
2385     bs2 = bs*bs;
2386     for (i=0; i<x->nz; i++) {
2387       j = 0;
2388       while (j < bs2){
2389         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2390         j++;
2391       }
2392     }
2393     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2394   } else {
2395     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2396   }
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2402 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2403 {
2404   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2405   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2406   PetscScalar    *aa = a->a;
2407 
2408   PetscFunctionBegin;
2409   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 #undef __FUNCT__
2414 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2415 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2416 {
2417   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2418   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2419   PetscScalar    *aa = a->a;
2420 
2421   PetscFunctionBegin;
2422   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 
2427 /* -------------------------------------------------------------------*/
2428 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2429        MatGetRow_SeqBAIJ,
2430        MatRestoreRow_SeqBAIJ,
2431        MatMult_SeqBAIJ_N,
2432 /* 4*/ MatMultAdd_SeqBAIJ_N,
2433        MatMultTranspose_SeqBAIJ,
2434        MatMultTransposeAdd_SeqBAIJ,
2435        MatSolve_SeqBAIJ_N,
2436        0,
2437        0,
2438 /*10*/ 0,
2439        MatLUFactor_SeqBAIJ,
2440        0,
2441        0,
2442        MatTranspose_SeqBAIJ,
2443 /*15*/ MatGetInfo_SeqBAIJ,
2444        MatEqual_SeqBAIJ,
2445        MatGetDiagonal_SeqBAIJ,
2446        MatDiagonalScale_SeqBAIJ,
2447        MatNorm_SeqBAIJ,
2448 /*20*/ 0,
2449        MatAssemblyEnd_SeqBAIJ,
2450        0,
2451        MatSetOption_SeqBAIJ,
2452        MatZeroEntries_SeqBAIJ,
2453 /*25*/ MatZeroRows_SeqBAIJ,
2454        MatLUFactorSymbolic_SeqBAIJ,
2455        MatLUFactorNumeric_SeqBAIJ_N,
2456        MatCholeskyFactorSymbolic_SeqBAIJ,
2457        MatCholeskyFactorNumeric_SeqBAIJ_N,
2458 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2459        MatILUFactorSymbolic_SeqBAIJ,
2460        MatICCFactorSymbolic_SeqBAIJ,
2461        MatGetArray_SeqBAIJ,
2462        MatRestoreArray_SeqBAIJ,
2463 /*35*/ MatDuplicate_SeqBAIJ,
2464        0,
2465        0,
2466        MatILUFactor_SeqBAIJ,
2467        0,
2468 /*40*/ MatAXPY_SeqBAIJ,
2469        MatGetSubMatrices_SeqBAIJ,
2470        MatIncreaseOverlap_SeqBAIJ,
2471        MatGetValues_SeqBAIJ,
2472        MatCopy_SeqBAIJ,
2473 /*45*/ 0,
2474        MatScale_SeqBAIJ,
2475        0,
2476        0,
2477        0,
2478 /*50*/ 0,
2479        MatGetRowIJ_SeqBAIJ,
2480        MatRestoreRowIJ_SeqBAIJ,
2481        0,
2482        0,
2483 /*55*/ 0,
2484        0,
2485        0,
2486        0,
2487        MatSetValuesBlocked_SeqBAIJ,
2488 /*60*/ MatGetSubMatrix_SeqBAIJ,
2489        MatDestroy_SeqBAIJ,
2490        MatView_SeqBAIJ,
2491        0,
2492        0,
2493 /*65*/ 0,
2494        0,
2495        0,
2496        0,
2497        0,
2498 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2499        MatConvert_Basic,
2500        0,
2501        0,
2502        0,
2503 /*75*/ 0,
2504        0,
2505        0,
2506        0,
2507        0,
2508 /*80*/ 0,
2509        0,
2510        0,
2511        0,
2512        MatLoad_SeqBAIJ,
2513 /*85*/ 0,
2514        0,
2515        0,
2516        0,
2517        0,
2518 /*90*/ 0,
2519        0,
2520        0,
2521        0,
2522        0,
2523 /*95*/ 0,
2524        0,
2525        0,
2526        0,
2527        0,
2528 /*100*/0,
2529        0,
2530        0,
2531        0,
2532        0,
2533 /*105*/0,
2534        MatRealPart_SeqBAIJ,
2535        MatImaginaryPart_SeqBAIJ,
2536        0,
2537        0,
2538 /*110*/0,
2539        0,
2540        0,
2541        0,
2542        MatMissingDiagonal_SeqBAIJ
2543 /*115*/
2544 };
2545 
2546 EXTERN_C_BEGIN
2547 #undef __FUNCT__
2548 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2549 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2550 {
2551   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2552   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2553   PetscErrorCode ierr;
2554 
2555   PetscFunctionBegin;
2556   if (aij->nonew != 1) {
2557     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2558   }
2559 
2560   /* allocate space for values if not already there */
2561   if (!aij->saved_values) {
2562     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2563   }
2564 
2565   /* copy values over */
2566   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2567   PetscFunctionReturn(0);
2568 }
2569 EXTERN_C_END
2570 
2571 EXTERN_C_BEGIN
2572 #undef __FUNCT__
2573 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2574 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2575 {
2576   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2577   PetscErrorCode ierr;
2578   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2579 
2580   PetscFunctionBegin;
2581   if (aij->nonew != 1) {
2582     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2583   }
2584   if (!aij->saved_values) {
2585     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2586   }
2587 
2588   /* copy values over */
2589   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2590   PetscFunctionReturn(0);
2591 }
2592 EXTERN_C_END
2593 
2594 EXTERN_C_BEGIN
2595 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2596 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2597 EXTERN_C_END
2598 
2599 EXTERN_C_BEGIN
2600 #undef __FUNCT__
2601 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2602 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2603 {
2604   Mat_SeqBAIJ    *b;
2605   PetscErrorCode ierr;
2606   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2607   PetscTruth     flg,skipallocation = PETSC_FALSE;
2608 
2609   PetscFunctionBegin;
2610 
2611   if (nz == MAT_SKIP_ALLOCATION) {
2612     skipallocation = PETSC_TRUE;
2613     nz             = 0;
2614   }
2615 
2616   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2617     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2618   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2619 
2620   if (nnz && newbs != bs) {
2621     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2622   }
2623   bs   = newbs;
2624 
2625   B->rmap.bs = B->cmap.bs = bs;
2626   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2627   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2628 
2629   B->preallocated = PETSC_TRUE;
2630 
2631   mbs  = B->rmap.n/bs;
2632   nbs  = B->cmap.n/bs;
2633   bs2  = bs*bs;
2634 
2635   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2636     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2637   }
2638 
2639   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2640   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2641   if (nnz) {
2642     for (i=0; i<mbs; i++) {
2643       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2644       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2645     }
2646   }
2647 
2648   b       = (Mat_SeqBAIJ*)B->data;
2649   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2650     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2651   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2652 
2653   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2654   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2655   if (!flg) {
2656     switch (bs) {
2657     case 1:
2658       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2659       B->ops->mult            = MatMult_SeqBAIJ_1;
2660       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2661       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2662       break;
2663     case 2:
2664       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2665       B->ops->mult            = MatMult_SeqBAIJ_2;
2666       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2667       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2668       break;
2669     case 3:
2670       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2671       B->ops->mult            = MatMult_SeqBAIJ_3;
2672       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2673       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2674       break;
2675     case 4:
2676       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2677       B->ops->mult            = MatMult_SeqBAIJ_4;
2678       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2679       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2680       break;
2681     case 5:
2682       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2683       B->ops->mult            = MatMult_SeqBAIJ_5;
2684       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2685       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2686       break;
2687     case 6:
2688       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2689       B->ops->mult            = MatMult_SeqBAIJ_6;
2690       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2691       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2692       break;
2693     case 7:
2694       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2695       B->ops->mult            = MatMult_SeqBAIJ_7;
2696       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2697       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2698       break;
2699     default:
2700       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2701       B->ops->mult            = MatMult_SeqBAIJ_N;
2702       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2703       break;
2704     }
2705   }
2706   B->rmap.bs      = bs;
2707   b->mbs     = mbs;
2708   b->nbs     = nbs;
2709   if (!skipallocation) {
2710     if (!b->imax) {
2711       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2712     }
2713     /* b->ilen will count nonzeros in each block row so far. */
2714     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2715     if (!nnz) {
2716       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2717       else if (nz <= 0)        nz = 1;
2718       for (i=0; i<mbs; i++) b->imax[i] = nz;
2719       nz = nz*mbs;
2720     } else {
2721       nz = 0;
2722       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2723     }
2724 
2725     /* allocate the matrix space */
2726     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2727     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2728     ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2729     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2730     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2731     b->singlemalloc = PETSC_TRUE;
2732     b->i[0] = 0;
2733     for (i=1; i<mbs+1; i++) {
2734       b->i[i] = b->i[i-1] + b->imax[i-1];
2735     }
2736     b->free_a     = PETSC_TRUE;
2737     b->free_ij    = PETSC_TRUE;
2738   } else {
2739     b->free_a     = PETSC_FALSE;
2740     b->free_ij    = PETSC_FALSE;
2741   }
2742 
2743   B->rmap.bs          = bs;
2744   b->bs2              = bs2;
2745   b->mbs              = mbs;
2746   b->nz               = 0;
2747   b->maxnz            = nz*bs2;
2748   B->info.nz_unneeded = (PetscReal)b->maxnz;
2749   PetscFunctionReturn(0);
2750 }
2751 EXTERN_C_END
2752 
2753 /*MC
2754    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2755    block sparse compressed row format.
2756 
2757    Options Database Keys:
2758 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2759 
2760   Level: beginner
2761 
2762 .seealso: MatCreateSeqBAIJ()
2763 M*/
2764 
2765 EXTERN_C_BEGIN
2766 #undef __FUNCT__
2767 #define __FUNCT__ "MatCreate_SeqBAIJ"
2768 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2769 {
2770   PetscErrorCode ierr;
2771   PetscMPIInt    size;
2772   Mat_SeqBAIJ    *b;
2773 
2774   PetscFunctionBegin;
2775   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2776   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2777 
2778   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2779   B->data = (void*)b;
2780   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2781   B->factor           = 0;
2782   B->mapping          = 0;
2783   b->row              = 0;
2784   b->col              = 0;
2785   b->icol             = 0;
2786   b->reallocs         = 0;
2787   b->saved_values     = 0;
2788 #if defined(PETSC_USE_MAT_SINGLE)
2789   b->setvalueslen     = 0;
2790   b->setvaluescopy    = PETSC_NULL;
2791 #endif
2792 
2793   b->roworiented      = PETSC_TRUE;
2794   b->nonew            = 0;
2795   b->diag             = 0;
2796   b->solve_work       = 0;
2797   b->mult_work        = 0;
2798   B->spptr            = 0;
2799   B->info.nz_unneeded = (PetscReal)b->maxnz;
2800   b->keepzeroedrows   = PETSC_FALSE;
2801   b->xtoy              = 0;
2802   b->XtoY              = 0;
2803   b->compressedrow.use     = PETSC_FALSE;
2804   b->compressedrow.nrows   = 0;
2805   b->compressedrow.i       = PETSC_NULL;
2806   b->compressedrow.rindex  = PETSC_NULL;
2807   b->compressedrow.checked = PETSC_FALSE;
2808   B->same_nonzero          = PETSC_FALSE;
2809 
2810   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2811                                      "MatInvertBlockDiagonal_SeqBAIJ",
2812                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2813   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2814                                      "MatStoreValues_SeqBAIJ",
2815                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2816   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2817                                      "MatRetrieveValues_SeqBAIJ",
2818                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2819   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2820                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2821                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2822   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2823                                      "MatConvert_SeqBAIJ_SeqAIJ",
2824                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2825   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2826                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2827                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2828   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2829                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2830                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2831   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2832   PetscFunctionReturn(0);
2833 }
2834 EXTERN_C_END
2835 
2836 #undef __FUNCT__
2837 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2838 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2839 {
2840   Mat            C;
2841   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2842   PetscErrorCode ierr;
2843   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2844 
2845   PetscFunctionBegin;
2846   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2847 
2848   *B = 0;
2849   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
2850   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2851   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2852   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2853   c    = (Mat_SeqBAIJ*)C->data;
2854 
2855   C->rmap.N   = A->rmap.N;
2856   C->cmap.N   = A->cmap.N;
2857   C->rmap.bs  = A->rmap.bs;
2858   c->bs2 = a->bs2;
2859   c->mbs = a->mbs;
2860   c->nbs = a->nbs;
2861 
2862   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2863   for (i=0; i<mbs; i++) {
2864     c->imax[i] = a->imax[i];
2865     c->ilen[i] = a->ilen[i];
2866   }
2867 
2868   /* allocate the matrix space */
2869   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2870   c->singlemalloc = PETSC_TRUE;
2871   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2872   if (mbs > 0) {
2873     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2874     if (cpvalues == MAT_COPY_VALUES) {
2875       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2876     } else {
2877       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2878     }
2879   }
2880   c->roworiented = a->roworiented;
2881   c->nonew       = a->nonew;
2882 
2883   if (a->diag) {
2884     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2885     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2886     for (i=0; i<mbs; i++) {
2887       c->diag[i] = a->diag[i];
2888     }
2889   } else c->diag        = 0;
2890   c->nz                 = a->nz;
2891   c->maxnz              = a->maxnz;
2892   c->solve_work         = 0;
2893   c->mult_work          = 0;
2894   c->free_a             = PETSC_TRUE;
2895   c->free_ij            = PETSC_TRUE;
2896   C->preallocated       = PETSC_TRUE;
2897   C->assembled          = PETSC_TRUE;
2898 
2899   c->compressedrow.use     = a->compressedrow.use;
2900   c->compressedrow.nrows   = a->compressedrow.nrows;
2901   c->compressedrow.checked = a->compressedrow.checked;
2902   if ( a->compressedrow.checked && a->compressedrow.use){
2903     i = a->compressedrow.nrows;
2904     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2905     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2906     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2907     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2908   } else {
2909     c->compressedrow.use    = PETSC_FALSE;
2910     c->compressedrow.i      = PETSC_NULL;
2911     c->compressedrow.rindex = PETSC_NULL;
2912   }
2913   C->same_nonzero = A->same_nonzero;
2914   *B = C;
2915   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2916   PetscFunctionReturn(0);
2917 }
2918 
2919 #undef __FUNCT__
2920 #define __FUNCT__ "MatLoad_SeqBAIJ"
2921 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2922 {
2923   Mat_SeqBAIJ    *a;
2924   Mat            B;
2925   PetscErrorCode ierr;
2926   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2927   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2928   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2929   PetscInt       *masked,nmask,tmp,bs2,ishift;
2930   PetscMPIInt    size;
2931   int            fd;
2932   PetscScalar    *aa;
2933   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2934 
2935   PetscFunctionBegin;
2936   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2937     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2938   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2939   bs2  = bs*bs;
2940 
2941   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2942   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2943   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2944   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2945   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2946   M = header[1]; N = header[2]; nz = header[3];
2947 
2948   if (header[3] < 0) {
2949     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2950   }
2951 
2952   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2953 
2954   /*
2955      This code adds extra rows to make sure the number of rows is
2956     divisible by the blocksize
2957   */
2958   mbs        = M/bs;
2959   extra_rows = bs - M + bs*(mbs);
2960   if (extra_rows == bs) extra_rows = 0;
2961   else                  mbs++;
2962   if (extra_rows) {
2963     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2964   }
2965 
2966   /* read in row lengths */
2967   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2968   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2969   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2970 
2971   /* read in column indices */
2972   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2973   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2974   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2975 
2976   /* loop over row lengths determining block row lengths */
2977   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2978   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2979   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2980   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2981   masked   = mask + mbs;
2982   rowcount = 0; nzcount = 0;
2983   for (i=0; i<mbs; i++) {
2984     nmask = 0;
2985     for (j=0; j<bs; j++) {
2986       kmax = rowlengths[rowcount];
2987       for (k=0; k<kmax; k++) {
2988         tmp = jj[nzcount++]/bs;
2989         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2990       }
2991       rowcount++;
2992     }
2993     browlengths[i] += nmask;
2994     /* zero out the mask elements we set */
2995     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2996   }
2997 
2998   /* create our matrix */
2999   ierr = MatCreate(comm,&B);
3000   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3001   ierr = MatSetType(B,type);CHKERRQ(ierr);
3002   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3003   a = (Mat_SeqBAIJ*)B->data;
3004 
3005   /* set matrix "i" values */
3006   a->i[0] = 0;
3007   for (i=1; i<= mbs; i++) {
3008     a->i[i]      = a->i[i-1] + browlengths[i-1];
3009     a->ilen[i-1] = browlengths[i-1];
3010   }
3011   a->nz         = 0;
3012   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3013 
3014   /* read in nonzero values */
3015   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3016   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3017   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3018 
3019   /* set "a" and "j" values into matrix */
3020   nzcount = 0; jcount = 0;
3021   for (i=0; i<mbs; i++) {
3022     nzcountb = nzcount;
3023     nmask    = 0;
3024     for (j=0; j<bs; j++) {
3025       kmax = rowlengths[i*bs+j];
3026       for (k=0; k<kmax; k++) {
3027         tmp = jj[nzcount++]/bs;
3028 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3029       }
3030     }
3031     /* sort the masked values */
3032     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3033 
3034     /* set "j" values into matrix */
3035     maskcount = 1;
3036     for (j=0; j<nmask; j++) {
3037       a->j[jcount++]  = masked[j];
3038       mask[masked[j]] = maskcount++;
3039     }
3040     /* set "a" values into matrix */
3041     ishift = bs2*a->i[i];
3042     for (j=0; j<bs; j++) {
3043       kmax = rowlengths[i*bs+j];
3044       for (k=0; k<kmax; k++) {
3045         tmp       = jj[nzcountb]/bs ;
3046         block     = mask[tmp] - 1;
3047         point     = jj[nzcountb] - bs*tmp;
3048         idx       = ishift + bs2*block + j + bs*point;
3049         a->a[idx] = (MatScalar)aa[nzcountb++];
3050       }
3051     }
3052     /* zero out the mask elements we set */
3053     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3054   }
3055   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3056 
3057   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3058   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3059   ierr = PetscFree(aa);CHKERRQ(ierr);
3060   ierr = PetscFree(jj);CHKERRQ(ierr);
3061   ierr = PetscFree(mask);CHKERRQ(ierr);
3062 
3063   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3064   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3065   ierr = MatView_Private(B);CHKERRQ(ierr);
3066 
3067   *A = B;
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatCreateSeqBAIJ"
3073 /*@C
3074    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3075    compressed row) format.  For good matrix assembly performance the
3076    user should preallocate the matrix storage by setting the parameter nz
3077    (or the array nnz).  By setting these parameters accurately, performance
3078    during matrix assembly can be increased by more than a factor of 50.
3079 
3080    Collective on MPI_Comm
3081 
3082    Input Parameters:
3083 +  comm - MPI communicator, set to PETSC_COMM_SELF
3084 .  bs - size of block
3085 .  m - number of rows
3086 .  n - number of columns
3087 .  nz - number of nonzero blocks  per block row (same for all rows)
3088 -  nnz - array containing the number of nonzero blocks in the various block rows
3089          (possibly different for each block row) or PETSC_NULL
3090 
3091    Output Parameter:
3092 .  A - the matrix
3093 
3094    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3095    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3096    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3097    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3098 
3099    Options Database Keys:
3100 .   -mat_no_unroll - uses code that does not unroll the loops in the
3101                      block calculations (much slower)
3102 .    -mat_block_size - size of the blocks to use
3103 
3104    Level: intermediate
3105 
3106    Notes:
3107    The number of rows and columns must be divisible by blocksize.
3108 
3109    If the nnz parameter is given then the nz parameter is ignored
3110 
3111    A nonzero block is any block that as 1 or more nonzeros in it
3112 
3113    The block AIJ format is fully compatible with standard Fortran 77
3114    storage.  That is, the stored row and column indices can begin at
3115    either one (as in Fortran) or zero.  See the users' manual for details.
3116 
3117    Specify the preallocated storage with either nz or nnz (not both).
3118    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3119    allocation.  For additional details, see the users manual chapter on
3120    matrices.
3121 
3122 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3123 @*/
3124 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3125 {
3126   PetscErrorCode ierr;
3127 
3128   PetscFunctionBegin;
3129   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3130   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3131   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3132   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3133   PetscFunctionReturn(0);
3134 }
3135 
3136 #undef __FUNCT__
3137 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3138 /*@C
3139    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3140    per row in the matrix. For good matrix assembly performance the
3141    user should preallocate the matrix storage by setting the parameter nz
3142    (or the array nnz).  By setting these parameters accurately, performance
3143    during matrix assembly can be increased by more than a factor of 50.
3144 
3145    Collective on MPI_Comm
3146 
3147    Input Parameters:
3148 +  A - the matrix
3149 .  bs - size of block
3150 .  nz - number of block nonzeros per block row (same for all rows)
3151 -  nnz - array containing the number of block nonzeros in the various block rows
3152          (possibly different for each block row) or PETSC_NULL
3153 
3154    Options Database Keys:
3155 .   -mat_no_unroll - uses code that does not unroll the loops in the
3156                      block calculations (much slower)
3157 .    -mat_block_size - size of the blocks to use
3158 
3159    Level: intermediate
3160 
3161    Notes:
3162    If the nnz parameter is given then the nz parameter is ignored
3163 
3164    You can call MatGetInfo() to get information on how effective the preallocation was;
3165    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3166    You can also run with the option -info and look for messages with the string
3167    malloc in them to see if additional memory allocation was needed.
3168 
3169    The block AIJ format is fully compatible with standard Fortran 77
3170    storage.  That is, the stored row and column indices can begin at
3171    either one (as in Fortran) or zero.  See the users' manual for details.
3172 
3173    Specify the preallocated storage with either nz or nnz (not both).
3174    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3175    allocation.  For additional details, see the users manual chapter on
3176    matrices.
3177 
3178 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3179 @*/
3180 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3181 {
3182   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3183 
3184   PetscFunctionBegin;
3185   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3186   if (f) {
3187     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3188   }
3189   PetscFunctionReturn(0);
3190 }
3191 
3192 #undef __FUNCT__
3193 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3194 /*@
3195      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3196               (upper triangular entries in CSR format) provided by the user.
3197 
3198      Collective on MPI_Comm
3199 
3200    Input Parameters:
3201 +  comm - must be an MPI communicator of size 1
3202 .  bs - size of block
3203 .  m - number of rows
3204 .  n - number of columns
3205 .  i - row indices
3206 .  j - column indices
3207 -  a - matrix values
3208 
3209    Output Parameter:
3210 .  mat - the matrix
3211 
3212    Level: intermediate
3213 
3214    Notes:
3215        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3216     once the matrix is destroyed
3217 
3218        You cannot set new nonzero locations into this matrix, that will generate an error.
3219 
3220        The i and j indices are 0 based
3221 
3222 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3223 
3224 @*/
3225 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3226 {
3227   PetscErrorCode ierr;
3228   PetscInt       ii;
3229   Mat_SeqBAIJ    *baij;
3230 
3231   PetscFunctionBegin;
3232   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3233   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3234 
3235   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3236   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3237   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3238   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3239   baij = (Mat_SeqBAIJ*)(*mat)->data;
3240   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3241 
3242   baij->i = i;
3243   baij->j = j;
3244   baij->a = a;
3245   baij->singlemalloc = PETSC_FALSE;
3246   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3247   baij->free_a       = PETSC_FALSE;
3248   baij->free_ij       = PETSC_FALSE;
3249 
3250   for (ii=0; ii<m; ii++) {
3251     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3252 #if defined(PETSC_USE_DEBUG)
3253     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3254 #endif
3255   }
3256 #if defined(PETSC_USE_DEBUG)
3257   for (ii=0; ii<baij->i[m]; ii++) {
3258     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3259     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3260   }
3261 #endif
3262 
3263   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3264   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3265   PetscFunctionReturn(0);
3266 }
3267