xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 6145cd650024ce38668196a0ed2dfef51f77eb7a)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values)*values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag    = a->idiag;
33   mdiag   = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37     case 1:
38       for (i=0; i<mbs; i++) {
39         odiag = v + 1*diag_offset[i];
40         diag[0]  = odiag[0];
41         mdiag[0] = odiag[0];
42         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43         diag    += 1;
44         mdiag   += 1;
45       }
46       break;
47     case 2:
48       for (i=0; i<mbs; i++) {
49         odiag   = v + 4*diag_offset[i];
50         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52         ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53         diag    += 4;
54         mdiag   += 4;
55       }
56       break;
57     case 3:
58       for (i=0; i<mbs; i++) {
59         odiag    = v + 9*diag_offset[i];
60         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62         diag[8]  = odiag[8];
63         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65         mdiag[8] = odiag[8];
66         ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67         diag    += 9;
68         mdiag   += 9;
69       }
70       break;
71     case 4:
72       for (i=0; i<mbs; i++) {
73         odiag  = v + 16*diag_offset[i];
74         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76         ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77         diag  += 16;
78         mdiag += 16;
79       }
80       break;
81     case 5:
82       for (i=0; i<mbs; i++) {
83         odiag = v + 25*diag_offset[i];
84         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86         ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87         diag  += 25;
88         mdiag += 25;
89       }
90       break;
91     case 6:
92       for (i=0; i<mbs; i++) {
93         odiag = v + 36*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96         ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97         diag  += 36;
98         mdiag += 36;
99       }
100       break;
101     case 7:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 49*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106         ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107         diag  += 49;
108         mdiag += 49;
109       }
110       break;
111     default:
112       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113       for (i=0; i<mbs; i++) {
114         odiag = v + bs2*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117         ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118         diag  += bs2;
119         mdiag += bs2;
120       }
121       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,j;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         for (j=0; j<nz; j++) {
166           s1 -= v[j]*x[vi[j]];
167         }
168         x[i2]   = idiag[0]*s1;
169         idiag   += 1;
170         i2      += 1;
171       }
172       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
174     }
175     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177       i2    = 0;
178       mdiag = a->idiag+a->mbs;
179       for (i=0; i<m; i++) {
180         x1      = x[i2];
181         x[i2]   = mdiag[0]*x1;
182         mdiag  += 1;
183         i2     += 1;
184       }
185       ierr = PetscLogFlops(m);CHKERRQ(ierr);
186     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
188     }
189     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
190       idiag   = a->idiag+a->mbs - 1;
191       i2      = m - 1;
192       x1      = x[i2];
193       x[i2]   = idiag[0]*x1;
194       idiag -= 1;
195       i2    -= 1;
196       for (i=m-2; i>=0; i--) {
197         v     = aa + (diag[i]+1);
198         vi    = aj + diag[i] + 1;
199         nz    = ai[i+1] - diag[i] - 1;
200         s1    = x[i2];
201         for (j=0; j<nz; j++) {
202           s1 -= v[j]*x[vi[j]];
203         }
204         x[i2]   = idiag[0]*s1;
205         idiag   -= 1;
206         i2      -= 1;
207       }
208       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,s1,s2;
224   const PetscScalar  *b;
225   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232   its = its*lits;
233   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
248       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250       i2     = 2;
251       idiag += 4;
252       for (i=1; i<m; i++) {
253         v     = aa + 4*ai[i];
254         vi    = aj + ai[i];
255         nz    = diag[i] - ai[i];
256         s1    = b[i2]; s2 = b[i2+1];
257         for (j=0; j<nz; j++) {
258           idx  = 2*vi[j];
259           it   = 4*j;
260           x1   = x[idx]; x2 = x[1+idx];
261           s1  -= v[it]*x1 + v[it+2]*x2;
262           s2  -= v[it+1]*x1 + v[it+3]*x2;
263         }
264         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
265         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266         idiag   += 4;
267         i2      += 2;
268       }
269       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
271     }
272     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274       i2    = 0;
275       mdiag = a->idiag+4*a->mbs;
276       for (i=0; i<m; i++) {
277         x1      = x[i2]; x2 = x[i2+1];
278         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
279         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280         mdiag  += 4;
281         i2     += 2;
282       }
283       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
284     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
286     }
287     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288       idiag   = a->idiag+4*a->mbs - 4;
289       i2      = 2*m - 2;
290       x1      = x[i2]; x2 = x[i2+1];
291       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
292       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293       idiag -= 4;
294       i2    -= 2;
295       for (i=m-2; i>=0; i--) {
296         v     = aa + 4*(diag[i]+1);
297         vi    = aj + diag[i] + 1;
298         nz    = ai[i+1] - diag[i] - 1;
299         s1    = x[i2]; s2 = x[i2+1];
300         for (j=0; j<nz; j++) {
301           idx  = 2*vi[j];
302           it   = 4*j;
303           x1   = x[idx]; x2 = x[1+idx];
304           s1  -= v[it]*x1 + v[it+2]*x2;
305           s2  -= v[it+1]*x1 + v[it+3]*x2;
306         }
307         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
308         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309         idiag   -= 4;
310         i2      -= 2;
311       }
312       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
313     }
314   } else {
315     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316   }
317   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
318   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325 {
326   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
328   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
329   const PetscScalar  *b;
330   PetscErrorCode     ierr;
331   PetscInt           m = a->mbs,i,i2,nz,idx;
332   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
333 
334   PetscFunctionBegin;
335   its = its*lits;
336   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
342 
343   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
344 
345   diag  = a->diag;
346   idiag = a->idiag;
347   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
348   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
349 
350   if (flag & SOR_ZERO_INITIAL_GUESS) {
351     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
352       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355       i2     = 3;
356       idiag += 9;
357       for (i=1; i<m; i++) {
358         v     = aa + 9*ai[i];
359         vi    = aj + ai[i];
360         nz    = diag[i] - ai[i];
361         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362         while (nz--) {
363           idx  = 3*(*vi++);
364           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368           v   += 9;
369         }
370         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373         idiag   += 9;
374         i2      += 3;
375       }
376       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
378     }
379     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381       i2    = 0;
382       mdiag = a->idiag+9*a->mbs;
383       for (i=0; i<m; i++) {
384         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388         mdiag  += 9;
389         i2     += 3;
390       }
391       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
392     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
394     }
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
396       idiag   = a->idiag+9*a->mbs - 9;
397       i2      = 3*m - 3;
398       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402       idiag -= 9;
403       i2    -= 3;
404       for (i=m-2; i>=0; i--) {
405         v     = aa + 9*(diag[i]+1);
406         vi    = aj + diag[i] + 1;
407         nz    = ai[i+1] - diag[i] - 1;
408         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409         while (nz--) {
410           idx  = 3*(*vi++);
411           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415           v   += 9;
416         }
417         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420         idiag   -= 9;
421         i2      -= 3;
422       }
423       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
424     }
425   } else {
426     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427   }
428   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436 {
437   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
438   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
439   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
440   const PetscScalar  *b;
441   PetscErrorCode     ierr;
442   PetscInt           m = a->mbs,i,i2,nz,idx;
443   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
444 
445   PetscFunctionBegin;
446   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447   its = its*lits;
448   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
453 
454   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
455 
456   diag  = a->diag;
457   idiag = a->idiag;
458   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
459   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
460 
461   if (flag & SOR_ZERO_INITIAL_GUESS) {
462     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
463       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
464       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
465       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467       i2     = 4;
468       idiag += 16;
469       for (i=1; i<m; i++) {
470         v     = aa + 16*ai[i];
471         vi    = aj + ai[i];
472         nz    = diag[i] - ai[i];
473         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474         while (nz--) {
475           idx  = 4*(*vi++);
476           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
478           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
479           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481           v   += 16;
482         }
483         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
484         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
485         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487         idiag   += 16;
488         i2      += 4;
489       }
490       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
492     }
493     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495       i2    = 0;
496       mdiag = a->idiag+16*a->mbs;
497       for (i=0; i<m; i++) {
498         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
500         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
501         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503         mdiag  += 16;
504         i2     += 4;
505       }
506       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
507     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
509     }
510     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
511       idiag   = a->idiag+16*a->mbs - 16;
512       i2      = 4*m - 4;
513       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
515       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
516       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518       idiag -= 16;
519       i2    -= 4;
520       for (i=m-2; i>=0; i--) {
521         v     = aa + 16*(diag[i]+1);
522         vi    = aj + diag[i] + 1;
523         nz    = ai[i+1] - diag[i] - 1;
524         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525         while (nz--) {
526           idx  = 4*(*vi++);
527           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
529           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
530           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532           v   += 16;
533         }
534         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
535         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
536         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538         idiag   -= 16;
539         i2      -= 4;
540       }
541       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
542     }
543   } else {
544     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545   }
546   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
547   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554 {
555   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
556   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
558   const PetscScalar  *b;
559   PetscErrorCode     ierr;
560   PetscInt           m = a->mbs,i,i2,nz,idx;
561   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
562 
563   PetscFunctionBegin;
564   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565   its = its*lits;
566   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
571 
572   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
573 
574   diag  = a->diag;
575   idiag = a->idiag;
576   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
578 
579   if (flag & SOR_ZERO_INITIAL_GUESS) {
580     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
581       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586       i2     = 5;
587       idiag += 25;
588       for (i=1; i<m; i++) {
589         v     = aa + 25*ai[i];
590         vi    = aj + ai[i];
591         nz    = diag[i] - ai[i];
592         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593         while (nz--) {
594           idx  = 5*(*vi++);
595           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601           v   += 25;
602         }
603         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608         idiag   += 25;
609         i2      += 5;
610       }
611       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
613     }
614     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616       i2    = 0;
617       mdiag = a->idiag+25*a->mbs;
618       for (i=0; i<m; i++) {
619         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625         mdiag  += 25;
626         i2     += 5;
627       }
628       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
629     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
631     }
632     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
633       idiag   = a->idiag+25*a->mbs - 25;
634       i2      = 5*m - 5;
635       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641       idiag -= 25;
642       i2    -= 5;
643       for (i=m-2; i>=0; i--) {
644         v     = aa + 25*(diag[i]+1);
645         vi    = aj + diag[i] + 1;
646         nz    = ai[i+1] - diag[i] - 1;
647         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648         while (nz--) {
649           idx  = 5*(*vi++);
650           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656           v   += 25;
657         }
658         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663         idiag   -= 25;
664         i2      -= 5;
665       }
666       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
667     }
668   } else {
669     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670   }
671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
672   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679 {
680   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
681   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
683   const PetscScalar  *b;
684   PetscErrorCode     ierr;
685   PetscInt           m = a->mbs,i,i2,nz,idx;
686   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
687 
688   PetscFunctionBegin;
689   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690   its = its*lits;
691   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
696 
697   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
698 
699   diag  = a->diag;
700   idiag = a->idiag;
701   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
702   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
703 
704   if (flag & SOR_ZERO_INITIAL_GUESS) {
705     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
706       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712       i2     = 6;
713       idiag += 36;
714       for (i=1; i<m; i++) {
715         v     = aa + 36*ai[i];
716         vi    = aj + ai[i];
717         nz    = diag[i] - ai[i];
718         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719         while (nz--) {
720           idx  = 6*(*vi++);
721           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728           v   += 36;
729         }
730         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736         idiag   += 36;
737         i2      += 6;
738       }
739       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
741     }
742     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744       i2    = 0;
745       mdiag = a->idiag+36*a->mbs;
746       for (i=0; i<m; i++) {
747         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754         mdiag  += 36;
755         i2     += 6;
756       }
757       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
758     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
760     }
761     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
762       idiag   = a->idiag+36*a->mbs - 36;
763       i2      = 6*m - 6;
764       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771       idiag -= 36;
772       i2    -= 6;
773       for (i=m-2; i>=0; i--) {
774         v     = aa + 36*(diag[i]+1);
775         vi    = aj + diag[i] + 1;
776         nz    = ai[i+1] - diag[i] - 1;
777         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778         while (nz--) {
779           idx  = 6*(*vi++);
780           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787           v   += 36;
788         }
789         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795         idiag   -= 36;
796         i2      -= 6;
797       }
798       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
799     }
800   } else {
801     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802   }
803   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
804   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811 {
812   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
813   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
815   const PetscScalar  *b;
816   PetscErrorCode     ierr;
817   PetscInt           m = a->mbs,i,i2,nz,idx;
818   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
819 
820   PetscFunctionBegin;
821   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822   its = its*lits;
823   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
828 
829   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
830 
831   diag  = a->diag;
832   idiag = a->idiag;
833   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
834   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
835 
836   if (flag & SOR_ZERO_INITIAL_GUESS) {
837     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
838       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845       i2     = 7;
846       idiag += 49;
847       for (i=1; i<m; i++) {
848         v     = aa + 49*ai[i];
849         vi    = aj + ai[i];
850         nz    = diag[i] - ai[i];
851         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852         while (nz--) {
853           idx  = 7*(*vi++);
854           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862           v   += 49;
863         }
864         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871         idiag   += 49;
872         i2      += 7;
873       }
874       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
876     }
877     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879       i2    = 0;
880       mdiag = a->idiag+49*a->mbs;
881       for (i=0; i<m; i++) {
882         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890         mdiag  += 49;
891         i2     += 7;
892       }
893       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
894     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
896     }
897     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
898       idiag   = a->idiag+49*a->mbs - 49;
899       i2      = 7*m - 7;
900       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908       idiag -= 49;
909       i2    -= 7;
910       for (i=m-2; i>=0; i--) {
911         v     = aa + 49*(diag[i]+1);
912         vi    = aj + diag[i] + 1;
913         nz    = ai[i+1] - diag[i] - 1;
914         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915         while (nz--) {
916           idx  = 7*(*vi++);
917           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925           v   += 49;
926         }
927         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934         idiag   -= 49;
935         i2      -= 7;
936       }
937       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
938     }
939   } else {
940     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941   }
942   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
943   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950 {
951   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
952   PetscScalar        *x,*work,*w,*workt;
953   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
954   const PetscScalar  *b;
955   PetscErrorCode     ierr;
956   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
958 
959   PetscFunctionBegin;
960   its = its*lits;
961   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
967 
968   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
969 
970   diag  = a->diag;
971   idiag = a->idiag;
972   if (!a->mult_work) {
973     k    = PetscMax(A->rmap->n,A->cmap->n);
974     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
975   }
976   work = a->mult_work;
977   if (!a->sor_work) {
978     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
979   }
980   w = a->sor_work;
981 
982   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
984 
985   if (flag & SOR_ZERO_INITIAL_GUESS) {
986     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
987       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991       i2     = bs;
992       idiag += bs2;
993       for (i=1; i<m; i++) {
994         v     = aa + bs2*ai[i];
995         vi    = aj + ai[i];
996         nz    = diag[i] - ai[i];
997 
998         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
999         /* copy all rows of x that are needed into contiguous space */
1000         workt = work;
1001         for (j=0; j<nz; j++) {
1002           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1003           workt += bs;
1004         }
1005         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007         while (nz--) {
1008           idx  = N*(*vi++);
1009           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013           v   += N2;
1014           } */
1015 
1016         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1020 
1021         idiag   += bs2;
1022         i2      += bs;
1023       }
1024       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1026     }
1027     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029       i2    = 0;
1030       mdiag = a->idiag+bs2*a->mbs;
1031       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       for (i=0; i<m; i++) {
1033         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1038 
1039         mdiag  += bs2;
1040         i2     += bs;
1041       }
1042       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1043     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1045     }
1046     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1047       idiag   = a->idiag+bs2*a->mbs - bs2;
1048       i2      = bs*m - bs;
1049       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1050       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055       idiag -= bs2;
1056       i2    -= bs;
1057       for (i=m-2; i>=0; i--) {
1058         v     = aa + bs2*(diag[i]+1);
1059         vi    = aj + diag[i] + 1;
1060         nz    = ai[i+1] - diag[i] - 1;
1061 
1062         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1063         /* copy all rows of x that are needed into contiguous space */
1064         workt = work;
1065         for (j=0; j<nz; j++) {
1066           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1067           workt += bs;
1068         }
1069         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1071         while (nz--) {
1072           idx  = N*(*vi++);
1073           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077           v   += N2;
1078           } */
1079 
1080         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084         idiag   -= bs2;
1085         i2      -= bs;
1086       }
1087       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1088     }
1089   } else {
1090     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091   }
1092   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1093   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 /*
1098     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099 */
1100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103 #define matsetvaluesblocked4_ matsetvaluesblocked4
1104 #endif
1105 
1106 EXTERN_C_BEGIN
1107 #undef __FUNCT__
1108 #define __FUNCT__ "matsetvaluesblocked4_"
1109 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110 {
1111   Mat               A = *AA;
1112   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114   PetscInt          *ai=a->i,*ailen=a->ilen;
1115   PetscInt          *aj=a->j,stepval,lastcol = -1;
1116   const PetscScalar *value = v;
1117   MatScalar         *ap,*aa = a->a,*bap;
1118 
1119   PetscFunctionBegin;
1120   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121   stepval = (n-1)*4;
1122   for (k=0; k<m; k++) { /* loop over added rows */
1123     row  = im[k];
1124     rp   = aj + ai[row];
1125     ap   = aa + 16*ai[row];
1126     nrow = ailen[row];
1127     low  = 0;
1128     high = nrow;
1129     for (l=0; l<n; l++) { /* loop over added columns */
1130       col = in[l];
1131       if (col <= lastcol)  low = 0;
1132       else                high = nrow;
1133       lastcol = col;
1134       value = v + k*(stepval+4 + l)*4;
1135       while (high-low > 7) {
1136         t = (low+high)/2;
1137         if (rp[t] > col) high = t;
1138         else             low  = t;
1139       }
1140       for (i=low; i<high; i++) {
1141         if (rp[i] > col) break;
1142         if (rp[i] == col) {
1143           bap  = ap +  16*i;
1144           for (ii=0; ii<4; ii++,value+=stepval) {
1145             for (jj=ii; jj<16; jj+=4) {
1146               bap[jj] += *value++;
1147             }
1148           }
1149           goto noinsert2;
1150         }
1151       }
1152       N = nrow++ - 1;
1153       high++; /* added new column index thus must search to one higher than before */
1154       /* shift up all the later entries in this row */
1155       for (ii=N; ii>=i; ii--) {
1156         rp[ii+1] = rp[ii];
1157         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1158       }
1159       if (N >= i) {
1160         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1161       }
1162       rp[i] = col;
1163       bap   = ap +  16*i;
1164       for (ii=0; ii<4; ii++,value+=stepval) {
1165         for (jj=ii; jj<16; jj+=4) {
1166           bap[jj] = *value++;
1167         }
1168       }
1169       noinsert2:;
1170       low = i;
1171     }
1172     ailen[row] = nrow;
1173   }
1174   PetscFunctionReturnVoid();
1175 }
1176 EXTERN_C_END
1177 
1178 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1179 #define matsetvalues4_ MATSETVALUES4
1180 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1181 #define matsetvalues4_ matsetvalues4
1182 #endif
1183 
1184 EXTERN_C_BEGIN
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatSetValues4_"
1187 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1188 {
1189   Mat         A = *AA;
1190   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1191   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1192   PetscInt    *ai=a->i,*ailen=a->ilen;
1193   PetscInt    *aj=a->j,brow,bcol;
1194   PetscInt    ridx,cidx,lastcol = -1;
1195   MatScalar   *ap,value,*aa=a->a,*bap;
1196 
1197   PetscFunctionBegin;
1198   for (k=0; k<m; k++) { /* loop over added rows */
1199     row  = im[k]; brow = row/4;
1200     rp   = aj + ai[brow];
1201     ap   = aa + 16*ai[brow];
1202     nrow = ailen[brow];
1203     low  = 0;
1204     high = nrow;
1205     for (l=0; l<n; l++) { /* loop over added columns */
1206       col = in[l]; bcol = col/4;
1207       ridx = row % 4; cidx = col % 4;
1208       value = v[l + k*n];
1209       if (col <= lastcol)  low = 0;
1210       else                high = nrow;
1211       lastcol = col;
1212       while (high-low > 7) {
1213         t = (low+high)/2;
1214         if (rp[t] > bcol) high = t;
1215         else              low  = t;
1216       }
1217       for (i=low; i<high; i++) {
1218         if (rp[i] > bcol) break;
1219         if (rp[i] == bcol) {
1220           bap  = ap +  16*i + 4*cidx + ridx;
1221           *bap += value;
1222           goto noinsert1;
1223         }
1224       }
1225       N = nrow++ - 1;
1226       high++; /* added new column thus must search to one higher than before */
1227       /* shift up all the later entries in this row */
1228       for (ii=N; ii>=i; ii--) {
1229         rp[ii+1] = rp[ii];
1230         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1231       }
1232       if (N>=i) {
1233         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1234       }
1235       rp[i]                    = bcol;
1236       ap[16*i + 4*cidx + ridx] = value;
1237       noinsert1:;
1238       low = i;
1239     }
1240     ailen[brow] = nrow;
1241   }
1242   PetscFunctionReturnVoid();
1243 }
1244 EXTERN_C_END
1245 
1246 /*
1247      Checks for missing diagonals
1248 */
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1251 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1252 {
1253   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1254   PetscErrorCode ierr;
1255   PetscInt       *diag,*jj = a->j,i;
1256 
1257   PetscFunctionBegin;
1258   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1259   *missing = PETSC_FALSE;
1260   if (A->rmap->n > 0 && !jj) {
1261     *missing  = PETSC_TRUE;
1262     if (d) *d = 0;
1263     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1264   } else {
1265     diag     = a->diag;
1266     for (i=0; i<a->mbs; i++) {
1267       if (jj[diag[i]] != i) {
1268         *missing  = PETSC_TRUE;
1269         if (d) *d = i;
1270         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1271         break;
1272       }
1273     }
1274   }
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1280 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1281 {
1282   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1283   PetscErrorCode ierr;
1284   PetscInt       i,j,m = a->mbs;
1285 
1286   PetscFunctionBegin;
1287   if (!a->diag) {
1288     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1289     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1290     a->free_diag = PETSC_TRUE;
1291   }
1292   for (i=0; i<m; i++) {
1293     a->diag[i] = a->i[i+1];
1294     for (j=a->i[i]; j<a->i[i+1]; j++) {
1295       if (a->j[j] == i) {
1296         a->diag[i] = j;
1297         break;
1298       }
1299     }
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1307 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1308 {
1309   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1312   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1313 
1314   PetscFunctionBegin;
1315   *nn = n;
1316   if (!ia) PetscFunctionReturn(0);
1317   if (symmetric) {
1318     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1319     nz   = tia[n];
1320   } else {
1321     tia = a->i; tja = a->j;
1322   }
1323 
1324   if (!blockcompressed && bs > 1) {
1325     (*nn) *= bs;
1326     /* malloc & create the natural set of indices */
1327     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1328     if (n) {
1329       (*ia)[0] = 0;
1330       for (j=1; j<bs; j++) {
1331         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1332       }
1333     }
1334 
1335     for (i=1; i<n; i++) {
1336       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1337       for (j=1; j<bs; j++) {
1338         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1339       }
1340     }
1341     if (n) {
1342       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1343     }
1344 
1345     if (inja) {
1346       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1347       cnt = 0;
1348       for (i=0; i<n; i++) {
1349         for (j=0; j<bs; j++) {
1350           for (k=tia[i]; k<tia[i+1]; k++) {
1351             for (l=0; l<bs; l++) {
1352               (*ja)[cnt++] = bs*tja[k] + l;
1353             }
1354           }
1355         }
1356       }
1357     }
1358 
1359     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1360       ierr = PetscFree(tia);CHKERRQ(ierr);
1361       ierr = PetscFree(tja);CHKERRQ(ierr);
1362     }
1363   } else if (oshift == 1) {
1364     if (symmetric) {
1365       nz = tia[A->rmap->n/bs];
1366       /*  add 1 to i and j indices */
1367       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1368       *ia = tia;
1369       if (ja) {
1370         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1371         *ja = tja;
1372       }
1373     } else {
1374       nz = a->i[A->rmap->n/bs];
1375       /* malloc space and  add 1 to i and j indices */
1376       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1377       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1378       if (ja) {
1379         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1380         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1381       }
1382     }
1383   } else {
1384     *ia = tia;
1385     if (ja) *ja = tja;
1386   }
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 #undef __FUNCT__
1391 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1392 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1393 {
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397   if (!ia) PetscFunctionReturn(0);
1398   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1399     ierr = PetscFree(*ia);CHKERRQ(ierr);
1400     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1407 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1408 {
1409   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413 #if defined(PETSC_USE_LOG)
1414   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1415 #endif
1416   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1417   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1418   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1419   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1420   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1421   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1422   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1423   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1424   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1425   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1426   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1427   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1428   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1429 
1430   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1431   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1432   ierr = PetscFree(A->data);CHKERRQ(ierr);
1433 
1434   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1435   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1436   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1437   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1443   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1444   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1450 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1451 {
1452   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1453   PetscErrorCode ierr;
1454 
1455   PetscFunctionBegin;
1456   switch (op) {
1457   case MAT_ROW_ORIENTED:
1458     a->roworiented    = flg;
1459     break;
1460   case MAT_KEEP_NONZERO_PATTERN:
1461     a->keepnonzeropattern = flg;
1462     break;
1463   case MAT_NEW_NONZERO_LOCATIONS:
1464     a->nonew          = (flg ? 0 : 1);
1465     break;
1466   case MAT_NEW_NONZERO_LOCATION_ERR:
1467     a->nonew          = (flg ? -1 : 0);
1468     break;
1469   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1470     a->nonew          = (flg ? -2 : 0);
1471     break;
1472   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1473     a->nounused       = (flg ? -1 : 0);
1474     break;
1475   case MAT_CHECK_COMPRESSED_ROW:
1476     a->compressedrow.check = flg;
1477     break;
1478   case MAT_NEW_DIAGONALS:
1479   case MAT_IGNORE_OFF_PROC_ENTRIES:
1480   case MAT_USE_HASH_TABLE:
1481     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1482     break;
1483   case MAT_SPD:
1484   case MAT_SYMMETRIC:
1485   case MAT_STRUCTURALLY_SYMMETRIC:
1486   case MAT_HERMITIAN:
1487   case MAT_SYMMETRY_ETERNAL:
1488     /* These options are handled directly by MatSetOption() */
1489     break;
1490   default:
1491     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1492   }
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1498 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1499 {
1500   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1501   PetscErrorCode ierr;
1502   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1503   MatScalar      *aa,*aa_i;
1504   PetscScalar    *v_i;
1505 
1506   PetscFunctionBegin;
1507   bs  = A->rmap->bs;
1508   ai  = a->i;
1509   aj  = a->j;
1510   aa  = a->a;
1511   bs2 = a->bs2;
1512 
1513   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1514 
1515   bn  = row/bs;   /* Block number */
1516   bp  = row % bs; /* Block Position */
1517   M   = ai[bn+1] - ai[bn];
1518   *nz = bs*M;
1519 
1520   if (v) {
1521     *v = 0;
1522     if (*nz) {
1523       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1524       for (i=0; i<M; i++) { /* for each block in the block row */
1525         v_i  = *v + i*bs;
1526         aa_i = aa + bs2*(ai[bn] + i);
1527         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1528       }
1529     }
1530   }
1531 
1532   if (idx) {
1533     *idx = 0;
1534     if (*nz) {
1535       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1536       for (i=0; i<M; i++) { /* for each block in the block row */
1537         idx_i = *idx + i*bs;
1538         itmp  = bs*aj[ai[bn] + i];
1539         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1540       }
1541     }
1542   }
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1548 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1549 {
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1554   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1559 
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1562 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1563 {
1564   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1565   Mat            C;
1566   PetscErrorCode ierr;
1567   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1568   PetscInt       *rows,*cols,bs2=a->bs2;
1569   MatScalar      *array;
1570 
1571   PetscFunctionBegin;
1572   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1573   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1574     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1575     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1576 
1577     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1578     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1579     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1580     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1581     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1582     ierr = PetscFree(col);CHKERRQ(ierr);
1583   } else {
1584     C = *B;
1585   }
1586 
1587   array = a->a;
1588   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1589   for (i=0; i<mbs; i++) {
1590     cols[0] = i*bs;
1591     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1592     len = ai[i+1] - ai[i];
1593     for (j=0; j<len; j++) {
1594       rows[0] = (*aj++)*bs;
1595       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1596       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1597       array += bs2;
1598     }
1599   }
1600   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1601 
1602   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1603   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1604 
1605   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1606     *B = C;
1607   } else {
1608     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1609   }
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 EXTERN_C_BEGIN
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1616 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1617 {
1618   PetscErrorCode ierr;
1619   Mat            Btrans;
1620 
1621   PetscFunctionBegin;
1622   *f = PETSC_FALSE;
1623   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1624   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1625   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1626   PetscFunctionReturn(0);
1627 }
1628 EXTERN_C_END
1629 
1630 #undef __FUNCT__
1631 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1632 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1633 {
1634   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1635   PetscErrorCode ierr;
1636   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1637   int            fd;
1638   PetscScalar    *aa;
1639   FILE           *file;
1640 
1641   PetscFunctionBegin;
1642   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1643   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1644   col_lens[0] = MAT_FILE_CLASSID;
1645 
1646   col_lens[1] = A->rmap->N;
1647   col_lens[2] = A->cmap->n;
1648   col_lens[3] = a->nz*bs2;
1649 
1650   /* store lengths of each row and write (including header) to file */
1651   count = 0;
1652   for (i=0; i<a->mbs; i++) {
1653     for (j=0; j<bs; j++) {
1654       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1655     }
1656   }
1657   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1658   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1659 
1660   /* store column indices (zero start index) */
1661   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1662   count = 0;
1663   for (i=0; i<a->mbs; i++) {
1664     for (j=0; j<bs; j++) {
1665       for (k=a->i[i]; k<a->i[i+1]; k++) {
1666         for (l=0; l<bs; l++) {
1667           jj[count++] = bs*a->j[k] + l;
1668         }
1669       }
1670     }
1671   }
1672   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1673   ierr = PetscFree(jj);CHKERRQ(ierr);
1674 
1675   /* store nonzero values */
1676   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1677   count = 0;
1678   for (i=0; i<a->mbs; i++) {
1679     for (j=0; j<bs; j++) {
1680       for (k=a->i[i]; k<a->i[i+1]; k++) {
1681         for (l=0; l<bs; l++) {
1682           aa[count++] = a->a[bs2*k + l*bs + j];
1683         }
1684       }
1685     }
1686   }
1687   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1688   ierr = PetscFree(aa);CHKERRQ(ierr);
1689 
1690   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1691   if (file) {
1692     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1693   }
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1699 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1700 {
1701   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1702   PetscErrorCode    ierr;
1703   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1704   PetscViewerFormat format;
1705 
1706   PetscFunctionBegin;
1707   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1708   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1709     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1710   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1711     Mat aij;
1712     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1713     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1714     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1715   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1716      PetscFunctionReturn(0);
1717   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1718     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1719     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1720     for (i=0; i<a->mbs; i++) {
1721       for (j=0; j<bs; j++) {
1722         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1723         for (k=a->i[i]; k<a->i[i+1]; k++) {
1724           for (l=0; l<bs; l++) {
1725 #if defined(PETSC_USE_COMPLEX)
1726             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1727               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1728                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1729             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1730               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1731                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1732             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1733               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1734             }
1735 #else
1736             if (a->a[bs2*k + l*bs + j] != 0.0) {
1737               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1738             }
1739 #endif
1740           }
1741         }
1742         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1743       }
1744     }
1745     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1746   } else {
1747     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1748     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1749     for (i=0; i<a->mbs; i++) {
1750       for (j=0; j<bs; j++) {
1751         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1752         for (k=a->i[i]; k<a->i[i+1]; k++) {
1753           for (l=0; l<bs; l++) {
1754 #if defined(PETSC_USE_COMPLEX)
1755             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1756               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1757                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1758             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1759               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1760                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1761             } else {
1762               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1763             }
1764 #else
1765             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1766 #endif
1767           }
1768         }
1769         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1770       }
1771     }
1772     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1773   }
1774   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #undef __FUNCT__
1779 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1780 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1781 {
1782   Mat            A = (Mat) Aa;
1783   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1784   PetscErrorCode ierr;
1785   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1786   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1787   MatScalar      *aa;
1788   PetscViewer    viewer;
1789   PetscViewerFormat format;
1790 
1791   PetscFunctionBegin;
1792   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1793   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1794 
1795   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1796 
1797   /* loop over matrix elements drawing boxes */
1798 
1799   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1800     color = PETSC_DRAW_BLUE;
1801     for (i=0,row=0; i<mbs; i++,row+=bs) {
1802       for (j=a->i[i]; j<a->i[i+1]; j++) {
1803         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1804         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1805         aa = a->a + j*bs2;
1806         for (k=0; k<bs; k++) {
1807           for (l=0; l<bs; l++) {
1808             if (PetscRealPart(*aa++) >=  0.) continue;
1809             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1810           }
1811         }
1812       }
1813     }
1814     color = PETSC_DRAW_CYAN;
1815     for (i=0,row=0; i<mbs; i++,row+=bs) {
1816       for (j=a->i[i]; j<a->i[i+1]; j++) {
1817         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1818         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1819         aa = a->a + j*bs2;
1820         for (k=0; k<bs; k++) {
1821           for (l=0; l<bs; l++) {
1822             if (PetscRealPart(*aa++) != 0.) continue;
1823             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1824           }
1825         }
1826       }
1827     }
1828     color = PETSC_DRAW_RED;
1829     for (i=0,row=0; i<mbs; i++,row+=bs) {
1830       for (j=a->i[i]; j<a->i[i+1]; j++) {
1831         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1832         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1833         aa = a->a + j*bs2;
1834         for (k=0; k<bs; k++) {
1835           for (l=0; l<bs; l++) {
1836             if (PetscRealPart(*aa++) <= 0.) continue;
1837             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1838           }
1839         }
1840       }
1841     }
1842   } else {
1843     /* use contour shading to indicate magnitude of values */
1844     /* first determine max of all nonzero values */
1845     PetscDraw   popup;
1846     PetscReal scale,maxv = 0.0;
1847 
1848     for (i=0; i<a->nz*a->bs2; i++) {
1849       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1850     }
1851     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1852     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1853     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1854     for (i=0,row=0; i<mbs; i++,row+=bs) {
1855       for (j=a->i[i]; j<a->i[i+1]; j++) {
1856         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1857         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1858         aa = a->a + j*bs2;
1859         for (k=0; k<bs; k++) {
1860           for (l=0; l<bs; l++) {
1861             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1862             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1863           }
1864         }
1865       }
1866     }
1867   }
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1873 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1874 {
1875   PetscErrorCode ierr;
1876   PetscReal      xl,yl,xr,yr,w,h;
1877   PetscDraw      draw;
1878   PetscBool      isnull;
1879 
1880   PetscFunctionBegin;
1881   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1882   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1883 
1884   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1885   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1886   xr += w;    yr += h;  xl = -w;     yl = -h;
1887   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1888   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1889   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatView_SeqBAIJ"
1895 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1896 {
1897   PetscErrorCode ierr;
1898   PetscBool      iascii,isbinary,isdraw;
1899 
1900   PetscFunctionBegin;
1901   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1902   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1903   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1904   if (iascii) {
1905     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1906   } else if (isbinary) {
1907     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1908   } else if (isdraw) {
1909     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1910   } else {
1911     Mat B;
1912     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1913     ierr = MatView(B,viewer);CHKERRQ(ierr);
1914     ierr = MatDestroy(&B);CHKERRQ(ierr);
1915   }
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1922 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1923 {
1924   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1925   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1926   PetscInt    *ai = a->i,*ailen = a->ilen;
1927   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1928   MatScalar   *ap,*aa = a->a;
1929 
1930   PetscFunctionBegin;
1931   for (k=0; k<m; k++) { /* loop over rows */
1932     row  = im[k]; brow = row/bs;
1933     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1934     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1935     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1936     nrow = ailen[brow];
1937     for (l=0; l<n; l++) { /* loop over columns */
1938       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1939       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1940       col  = in[l] ;
1941       bcol = col/bs;
1942       cidx = col%bs;
1943       ridx = row%bs;
1944       high = nrow;
1945       low  = 0; /* assume unsorted */
1946       while (high-low > 5) {
1947         t = (low+high)/2;
1948         if (rp[t] > bcol) high = t;
1949         else             low  = t;
1950       }
1951       for (i=low; i<high; i++) {
1952         if (rp[i] > bcol) break;
1953         if (rp[i] == bcol) {
1954           *v++ = ap[bs2*i+bs*cidx+ridx];
1955           goto finished;
1956         }
1957       }
1958       *v++ = 0.0;
1959       finished:;
1960     }
1961   }
1962   PetscFunctionReturn(0);
1963 }
1964 
1965 #undef __FUNCT__
1966 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1967 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1968 {
1969   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1970   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1971   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1972   PetscErrorCode    ierr;
1973   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1974   PetscBool         roworiented=a->roworiented;
1975   const PetscScalar *value = v;
1976   MatScalar         *ap,*aa = a->a,*bap;
1977 
1978   PetscFunctionBegin;
1979   if (roworiented) {
1980     stepval = (n-1)*bs;
1981   } else {
1982     stepval = (m-1)*bs;
1983   }
1984   for (k=0; k<m; k++) { /* loop over added rows */
1985     row  = im[k];
1986     if (row < 0) continue;
1987 #if defined(PETSC_USE_DEBUG)
1988     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1989 #endif
1990     rp   = aj + ai[row];
1991     ap   = aa + bs2*ai[row];
1992     rmax = imax[row];
1993     nrow = ailen[row];
1994     low  = 0;
1995     high = nrow;
1996     for (l=0; l<n; l++) { /* loop over added columns */
1997       if (in[l] < 0) continue;
1998 #if defined(PETSC_USE_DEBUG)
1999       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
2000 #endif
2001       col = in[l];
2002       if (roworiented) {
2003         value = v + (k*(stepval+bs) + l)*bs;
2004       } else {
2005         value = v + (l*(stepval+bs) + k)*bs;
2006       }
2007       if (col <= lastcol) low = 0; else high = nrow;
2008       lastcol = col;
2009       while (high-low > 7) {
2010         t = (low+high)/2;
2011         if (rp[t] > col) high = t;
2012         else             low  = t;
2013       }
2014       for (i=low; i<high; i++) {
2015         if (rp[i] > col) break;
2016         if (rp[i] == col) {
2017           bap  = ap +  bs2*i;
2018           if (roworiented) {
2019             if (is == ADD_VALUES) {
2020               for (ii=0; ii<bs; ii++,value+=stepval) {
2021                 for (jj=ii; jj<bs2; jj+=bs) {
2022                   bap[jj] += *value++;
2023                 }
2024               }
2025             } else {
2026               for (ii=0; ii<bs; ii++,value+=stepval) {
2027                 for (jj=ii; jj<bs2; jj+=bs) {
2028                   bap[jj] = *value++;
2029                 }
2030               }
2031             }
2032           } else {
2033             if (is == ADD_VALUES) {
2034               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2035                 for (jj=0; jj<bs; jj++) {
2036                   bap[jj] += value[jj];
2037                 }
2038                 bap += bs;
2039               }
2040             } else {
2041               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2042                 for (jj=0; jj<bs; jj++) {
2043                   bap[jj]  = value[jj];
2044                 }
2045                 bap += bs;
2046               }
2047             }
2048           }
2049           goto noinsert2;
2050         }
2051       }
2052       if (nonew == 1) goto noinsert2;
2053       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2054       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2055       N = nrow++ - 1; high++;
2056       /* shift up all the later entries in this row */
2057       for (ii=N; ii>=i; ii--) {
2058         rp[ii+1] = rp[ii];
2059         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2060       }
2061       if (N >= i) {
2062         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2063       }
2064       rp[i] = col;
2065       bap   = ap +  bs2*i;
2066       if (roworiented) {
2067         for (ii=0; ii<bs; ii++,value+=stepval) {
2068           for (jj=ii; jj<bs2; jj+=bs) {
2069             bap[jj] = *value++;
2070           }
2071         }
2072       } else {
2073         for (ii=0; ii<bs; ii++,value+=stepval) {
2074           for (jj=0; jj<bs; jj++) {
2075             *bap++  = *value++;
2076           }
2077         }
2078       }
2079       noinsert2:;
2080       low = i;
2081     }
2082     ailen[row] = nrow;
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2089 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2090 {
2091   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2092   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2093   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2094   PetscErrorCode ierr;
2095   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2096   MatScalar      *aa = a->a,*ap;
2097   PetscReal      ratio=0.6;
2098 
2099   PetscFunctionBegin;
2100   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2101 
2102   if (m) rmax = ailen[0];
2103   for (i=1; i<mbs; i++) {
2104     /* move each row back by the amount of empty slots (fshift) before it*/
2105     fshift += imax[i-1] - ailen[i-1];
2106     rmax   = PetscMax(rmax,ailen[i]);
2107     if (fshift) {
2108       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2109       N = ailen[i];
2110       for (j=0; j<N; j++) {
2111         ip[j-fshift] = ip[j];
2112         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2113       }
2114     }
2115     ai[i] = ai[i-1] + ailen[i-1];
2116   }
2117   if (mbs) {
2118     fshift += imax[mbs-1] - ailen[mbs-1];
2119     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2120   }
2121   /* reset ilen and imax for each row */
2122   for (i=0; i<mbs; i++) {
2123     ailen[i] = imax[i] = ai[i+1] - ai[i];
2124   }
2125   a->nz = ai[mbs];
2126 
2127   /* diagonals may have moved, so kill the diagonal pointers */
2128   a->idiagvalid = PETSC_FALSE;
2129   if (fshift && a->diag) {
2130     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2131     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2132     a->diag = 0;
2133   }
2134   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2135   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2136   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2137   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2138   A->info.mallocs     += a->reallocs;
2139   a->reallocs          = 0;
2140   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2141 
2142   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2143   A->same_nonzero = PETSC_TRUE;
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 /*
2148    This function returns an array of flags which indicate the locations of contiguous
2149    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2150    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2151    Assume: sizes should be long enough to hold all the values.
2152 */
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2155 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2156 {
2157   PetscInt   i,j,k,row;
2158   PetscBool  flg;
2159 
2160   PetscFunctionBegin;
2161   for (i=0,j=0; i<n; j++) {
2162     row = idx[i];
2163     if (row%bs!=0) { /* Not the begining of a block */
2164       sizes[j] = 1;
2165       i++;
2166     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2167       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2168       i++;
2169     } else { /* Begining of the block, so check if the complete block exists */
2170       flg = PETSC_TRUE;
2171       for (k=1; k<bs; k++) {
2172         if (row+k != idx[i+k]) { /* break in the block */
2173           flg = PETSC_FALSE;
2174           break;
2175         }
2176       }
2177       if (flg) { /* No break in the bs */
2178         sizes[j] = bs;
2179         i+= bs;
2180       } else {
2181         sizes[j] = 1;
2182         i++;
2183       }
2184     }
2185   }
2186   *bs_max = j;
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 #undef __FUNCT__
2191 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2192 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2193 {
2194   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2195   PetscErrorCode    ierr;
2196   PetscInt          i,j,k,count,*rows;
2197   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2198   PetscScalar       zero = 0.0;
2199   MatScalar         *aa;
2200   const PetscScalar *xx;
2201   PetscScalar       *bb;
2202 
2203   PetscFunctionBegin;
2204   /* fix right hand side if needed */
2205   if (x && b) {
2206     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2207     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2208     for (i=0; i<is_n; i++) {
2209       bb[is_idx[i]] = diag*xx[is_idx[i]];
2210     }
2211     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2212     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2213   }
2214 
2215   /* Make a copy of the IS and  sort it */
2216   /* allocate memory for rows,sizes */
2217   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2218 
2219   /* copy IS values to rows, and sort them */
2220   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2221   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2222 
2223   if (baij->keepnonzeropattern) {
2224     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2225     bs_max = is_n;
2226     A->same_nonzero = PETSC_TRUE;
2227   } else {
2228     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2229     A->same_nonzero = PETSC_FALSE;
2230   }
2231 
2232   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2233     row   = rows[j];
2234     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2235     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2236     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2237     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2238       if (diag != (PetscScalar)0.0) {
2239         if (baij->ilen[row/bs] > 0) {
2240           baij->ilen[row/bs]       = 1;
2241           baij->j[baij->i[row/bs]] = row/bs;
2242           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2243         }
2244         /* Now insert all the diagonal values for this bs */
2245         for (k=0; k<bs; k++) {
2246           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2247         }
2248       } else { /* (diag == 0.0) */
2249         baij->ilen[row/bs] = 0;
2250       } /* end (diag == 0.0) */
2251     } else { /* (sizes[i] != bs) */
2252 #if defined (PETSC_USE_DEBUG)
2253       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2254 #endif
2255       for (k=0; k<count; k++) {
2256         aa[0] =  zero;
2257         aa    += bs;
2258       }
2259       if (diag != (PetscScalar)0.0) {
2260         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2261       }
2262     }
2263   }
2264 
2265   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2266   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 #undef __FUNCT__
2271 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2272 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2273 {
2274   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2275   PetscErrorCode    ierr;
2276   PetscInt          i,j,k,count;
2277   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2278   PetscScalar       zero = 0.0;
2279   MatScalar         *aa;
2280   const PetscScalar *xx;
2281   PetscScalar       *bb;
2282   PetscBool         *zeroed,vecs = PETSC_FALSE;
2283 
2284   PetscFunctionBegin;
2285   /* fix right hand side if needed */
2286   if (x && b) {
2287     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2288     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2289     vecs = PETSC_TRUE;
2290   }
2291   A->same_nonzero = PETSC_TRUE;
2292 
2293   /* zero the columns */
2294   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2295   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2296   for (i=0; i<is_n; i++) {
2297     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2298     zeroed[is_idx[i]] = PETSC_TRUE;
2299   }
2300   for (i=0; i<A->rmap->N; i++) {
2301     if (!zeroed[i]) {
2302       row = i/bs;
2303       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2304         for (k=0; k<bs; k++) {
2305           col = bs*baij->j[j] + k;
2306           if (zeroed[col]) {
2307             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2308             if (vecs) bb[i] -= aa[0]*xx[col];
2309             aa[0] = 0.0;
2310           }
2311         }
2312       }
2313     } else if (vecs) bb[i] = diag*xx[i];
2314   }
2315   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2316   if (vecs) {
2317     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2318     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2319   }
2320 
2321   /* zero the rows */
2322   for (i=0; i<is_n; i++) {
2323     row   = is_idx[i];
2324     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2325     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2326     for (k=0; k<count; k++) {
2327       aa[0] =  zero;
2328       aa    += bs;
2329     }
2330     if (diag != (PetscScalar)0.0) {
2331       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2332     }
2333   }
2334   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 #undef __FUNCT__
2339 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2340 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2341 {
2342   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2343   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2344   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2345   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2346   PetscErrorCode ierr;
2347   PetscInt       ridx,cidx,bs2=a->bs2;
2348   PetscBool      roworiented=a->roworiented;
2349   MatScalar      *ap,value,*aa=a->a,*bap;
2350 
2351   PetscFunctionBegin;
2352   if (v) PetscValidScalarPointer(v,6);
2353   for (k=0; k<m; k++) { /* loop over added rows */
2354     row  = im[k];
2355     brow = row/bs;
2356     if (row < 0) continue;
2357 #if defined(PETSC_USE_DEBUG)
2358     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2359 #endif
2360     rp   = aj + ai[brow];
2361     ap   = aa + bs2*ai[brow];
2362     rmax = imax[brow];
2363     nrow = ailen[brow];
2364     low  = 0;
2365     high = nrow;
2366     for (l=0; l<n; l++) { /* loop over added columns */
2367       if (in[l] < 0) continue;
2368 #if defined(PETSC_USE_DEBUG)
2369       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2370 #endif
2371       col = in[l]; bcol = col/bs;
2372       ridx = row % bs; cidx = col % bs;
2373       if (roworiented) {
2374         value = v[l + k*n];
2375       } else {
2376         value = v[k + l*m];
2377       }
2378       if (col <= lastcol) low = 0; else high = nrow;
2379       lastcol = col;
2380       while (high-low > 7) {
2381         t = (low+high)/2;
2382         if (rp[t] > bcol) high = t;
2383         else              low  = t;
2384       }
2385       for (i=low; i<high; i++) {
2386         if (rp[i] > bcol) break;
2387         if (rp[i] == bcol) {
2388           bap  = ap +  bs2*i + bs*cidx + ridx;
2389           if (is == ADD_VALUES) *bap += value;
2390           else                  *bap  = value;
2391           goto noinsert1;
2392         }
2393       }
2394       if (nonew == 1) goto noinsert1;
2395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2396       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2397       N = nrow++ - 1; high++;
2398       /* shift up all the later entries in this row */
2399       for (ii=N; ii>=i; ii--) {
2400         rp[ii+1] = rp[ii];
2401         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2402       }
2403       if (N>=i) {
2404         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2405       }
2406       rp[i]                      = bcol;
2407       ap[bs2*i + bs*cidx + ridx] = value;
2408       a->nz++;
2409       noinsert1:;
2410       low = i;
2411     }
2412     ailen[brow] = nrow;
2413   }
2414   A->same_nonzero = PETSC_FALSE;
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 #undef __FUNCT__
2419 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2420 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2421 {
2422   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2423   Mat            outA;
2424   PetscErrorCode ierr;
2425   PetscBool      row_identity,col_identity;
2426 
2427   PetscFunctionBegin;
2428   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2429   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2430   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2431   if (!row_identity || !col_identity) {
2432     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2433   }
2434 
2435   outA            = inA;
2436   inA->factortype = MAT_FACTOR_LU;
2437 
2438   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2439 
2440   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2441   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2442   a->row = row;
2443   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2444   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2445   a->col = col;
2446 
2447   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2448   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2449    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2450   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2451 
2452   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2453   if (!a->solve_work) {
2454     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2455     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2456   }
2457   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2458 
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 EXTERN_C_BEGIN
2463 #undef __FUNCT__
2464 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2465 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2466 {
2467   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2468   PetscInt    i,nz,mbs;
2469 
2470   PetscFunctionBegin;
2471   nz  = baij->maxnz;
2472   mbs = baij->mbs;
2473   for (i=0; i<nz; i++) {
2474     baij->j[i] = indices[i];
2475   }
2476   baij->nz = nz;
2477   for (i=0; i<mbs; i++) {
2478     baij->ilen[i] = baij->imax[i];
2479   }
2480   PetscFunctionReturn(0);
2481 }
2482 EXTERN_C_END
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2486 /*@
2487     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2488        in the matrix.
2489 
2490   Input Parameters:
2491 +  mat - the SeqBAIJ matrix
2492 -  indices - the column indices
2493 
2494   Level: advanced
2495 
2496   Notes:
2497     This can be called if you have precomputed the nonzero structure of the
2498   matrix and want to provide it to the matrix object to improve the performance
2499   of the MatSetValues() operation.
2500 
2501     You MUST have set the correct numbers of nonzeros per row in the call to
2502   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2503 
2504     MUST be called before any calls to MatSetValues();
2505 
2506 @*/
2507 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2508 {
2509   PetscErrorCode ierr;
2510 
2511   PetscFunctionBegin;
2512   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2513   PetscValidPointer(indices,2);
2514   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2520 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2521 {
2522   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2523   PetscErrorCode ierr;
2524   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2525   PetscReal      atmp;
2526   PetscScalar    *x,zero = 0.0;
2527   MatScalar      *aa;
2528   PetscInt       ncols,brow,krow,kcol;
2529 
2530   PetscFunctionBegin;
2531   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2532   bs   = A->rmap->bs;
2533   aa   = a->a;
2534   ai   = a->i;
2535   aj   = a->j;
2536   mbs  = a->mbs;
2537 
2538   ierr = VecSet(v,zero);CHKERRQ(ierr);
2539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2540   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2541   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2542   for (i=0; i<mbs; i++) {
2543     ncols = ai[1] - ai[0]; ai++;
2544     brow  = bs*i;
2545     for (j=0; j<ncols; j++) {
2546       for (kcol=0; kcol<bs; kcol++) {
2547         for (krow=0; krow<bs; krow++) {
2548           atmp = PetscAbsScalar(*aa);aa++;
2549           row = brow + krow;    /* row index */
2550           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2551           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2552         }
2553       }
2554       aj++;
2555     }
2556   }
2557   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatCopy_SeqBAIJ"
2563 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2564 {
2565   PetscErrorCode ierr;
2566 
2567   PetscFunctionBegin;
2568   /* If the two matrices have the same copy implementation, use fast copy. */
2569   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2570     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2571     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2572     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2573 
2574     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2575     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2576     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2577   } else {
2578     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2579   }
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 #undef __FUNCT__
2584 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2585 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2586 {
2587   PetscErrorCode ierr;
2588 
2589   PetscFunctionBegin;
2590   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 #undef __FUNCT__
2595 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2596 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2597 {
2598   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2599 
2600   PetscFunctionBegin;
2601   *array = a->a;
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 #undef __FUNCT__
2606 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2607 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2608 {
2609   PetscFunctionBegin;
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 #undef __FUNCT__
2614 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2615 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2616 {
2617   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2618   PetscErrorCode ierr;
2619   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2620   PetscBLASInt   one=1;
2621 
2622   PetscFunctionBegin;
2623   if (str == SAME_NONZERO_PATTERN) {
2624     PetscScalar alpha = a;
2625     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2626     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2627   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2628     if (y->xtoy && y->XtoY != X) {
2629       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2630       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2631     }
2632     if (!y->xtoy) { /* get xtoy */
2633       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2634       y->XtoY = X;
2635       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2636     }
2637     for (i=0; i<x->nz; i++) {
2638       j = 0;
2639       while (j < bs2) {
2640         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2641         j++;
2642       }
2643     }
2644     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2645   } else {
2646     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2647   }
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 #undef __FUNCT__
2652 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2653 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2654 {
2655   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2656   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2657   MatScalar      *aa = a->a;
2658 
2659   PetscFunctionBegin;
2660   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 #undef __FUNCT__
2665 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2666 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2667 {
2668   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2669   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2670   MatScalar      *aa = a->a;
2671 
2672   PetscFunctionBegin;
2673   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2678 
2679 #undef __FUNCT__
2680 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2681 /*
2682     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2683 */
2684 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2685 {
2686   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2687   PetscErrorCode ierr;
2688   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2689   PetscInt       nz = a->i[m],row,*jj,mr,col;
2690 
2691   PetscFunctionBegin;
2692   *nn = n;
2693   if (!ia) PetscFunctionReturn(0);
2694   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2695   else {
2696     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2697     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2698     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2699     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2700     jj = a->j;
2701     for (i=0; i<nz; i++) {
2702       collengths[jj[i]]++;
2703     }
2704     cia[0] = oshift;
2705     for (i=0; i<n; i++) {
2706       cia[i+1] = cia[i] + collengths[i];
2707     }
2708     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2709     jj   = a->j;
2710     for (row=0; row<m; row++) {
2711       mr = a->i[row+1] - a->i[row];
2712       for (i=0; i<mr; i++) {
2713         col = *jj++;
2714         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2715       }
2716     }
2717     ierr = PetscFree(collengths);CHKERRQ(ierr);
2718     *ia = cia; *ja = cja;
2719   }
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 #undef __FUNCT__
2724 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2725 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2726 {
2727   PetscErrorCode ierr;
2728 
2729   PetscFunctionBegin;
2730   if (!ia) PetscFunctionReturn(0);
2731   ierr = PetscFree(*ia);CHKERRQ(ierr);
2732   ierr = PetscFree(*ja);CHKERRQ(ierr);
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2738 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2739 {
2740   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2741   PetscErrorCode ierr;
2742   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2743   PetscScalar    dx,*y,*xx,*w3_array;
2744   PetscScalar    *vscale_array;
2745   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2746   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2747   void           *fctx = coloring->fctx;
2748   PetscBool      flg = PETSC_FALSE;
2749   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2750   Vec            x1_tmp;
2751 
2752   PetscFunctionBegin;
2753   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2754   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2755   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2756   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2757 
2758   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2759   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2760   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2761   if (flg) {
2762     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2763   } else {
2764     PetscBool  assembled;
2765     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2766     if (assembled) {
2767       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2768     }
2769   }
2770 
2771   x1_tmp = x1;
2772   if (!coloring->vscale) {
2773     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2774   }
2775 
2776   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2777     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2778   }
2779   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2780 
2781   /* Set w1 = F(x1) */
2782   if (!coloring->fset) {
2783     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2784     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2785     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2786   } else {
2787     coloring->fset = PETSC_FALSE;
2788   }
2789 
2790   if (!coloring->w3) {
2791     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2792     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2793   }
2794   w3 = coloring->w3;
2795 
2796     CHKMEMQ;
2797     /* Compute all the local scale factors, including ghost points */
2798   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2799   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2800   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2801   if (ctype == IS_COLORING_GHOSTED) {
2802     col_start = 0; col_end = N;
2803   } else if (ctype == IS_COLORING_GLOBAL) {
2804     xx = xx - start;
2805     vscale_array = vscale_array - start;
2806     col_start = start; col_end = N + start;
2807   }    CHKMEMQ;
2808   for (col=col_start; col<col_end; col++) {
2809     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2810     if (coloring->htype[0] == 'w') {
2811       dx = 1.0 + unorm;
2812     } else {
2813       dx  = xx[col];
2814     }
2815     if (dx == (PetscScalar)0.0) dx = 1.0;
2816     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2817     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2818     dx               *= epsilon;
2819     vscale_array[col] = (PetscScalar)1.0/dx;
2820   }     CHKMEMQ;
2821   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2822   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2823   if (ctype == IS_COLORING_GLOBAL) {
2824     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2825     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2826   }
2827   CHKMEMQ;
2828   if (coloring->vscaleforrow) {
2829     vscaleforrow = coloring->vscaleforrow;
2830   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2831 
2832   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2833   /*
2834     Loop over each color
2835   */
2836   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2837   for (k=0; k<coloring->ncolors; k++) {
2838     coloring->currentcolor = k;
2839     for (i=0; i<bs; i++) {
2840       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2841       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2842       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2843       /*
2844         Loop over each column associated with color
2845         adding the perturbation to the vector w3.
2846       */
2847       for (l=0; l<coloring->ncolumns[k]; l++) {
2848         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2849         if (coloring->htype[0] == 'w') {
2850           dx = 1.0 + unorm;
2851         } else {
2852           dx  = xx[col];
2853         }
2854         if (dx == (PetscScalar)0.0) dx = 1.0;
2855         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2856         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2857         dx            *= epsilon;
2858         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2859         w3_array[col] += dx;
2860       }
2861       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2862       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2863 
2864       /*
2865         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2866         w2 = F(x1 + dx) - F(x1)
2867       */
2868       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2869       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2870       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2871       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2872 
2873       /*
2874         Loop over rows of vector, putting results into Jacobian matrix
2875       */
2876       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2877       for (l=0; l<coloring->nrows[k]; l++) {
2878         row    = bs*coloring->rows[k][l];             /* local row index */
2879         col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2880         for (j=0; j<bs; j++) {
2881           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2882           srows[j]  = row + start + j;
2883         }
2884         ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2885       }
2886       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2887     }
2888   } /* endof for each color */
2889   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2890   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2891   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2892   ierr = PetscFree(srows);CHKERRQ(ierr);
2893 
2894   coloring->currentcolor = -1;
2895   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2896   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2897   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2898   PetscFunctionReturn(0);
2899 }
2900 
2901 /* -------------------------------------------------------------------*/
2902 static struct _MatOps MatOps_Values = {
2903         MatSetValues_SeqBAIJ,
2904         MatGetRow_SeqBAIJ,
2905         MatRestoreRow_SeqBAIJ,
2906         MatMult_SeqBAIJ_N,
2907 /* 4*/  MatMultAdd_SeqBAIJ_N,
2908         MatMultTranspose_SeqBAIJ,
2909         MatMultTransposeAdd_SeqBAIJ,
2910         0,
2911         0,
2912         0,
2913 /* 10*/ 0,
2914         MatLUFactor_SeqBAIJ,
2915         0,
2916         0,
2917         MatTranspose_SeqBAIJ,
2918 /* 15*/ MatGetInfo_SeqBAIJ,
2919         MatEqual_SeqBAIJ,
2920         MatGetDiagonal_SeqBAIJ,
2921         MatDiagonalScale_SeqBAIJ,
2922         MatNorm_SeqBAIJ,
2923 /* 20*/ 0,
2924         MatAssemblyEnd_SeqBAIJ,
2925         MatSetOption_SeqBAIJ,
2926         MatZeroEntries_SeqBAIJ,
2927 /* 24*/ MatZeroRows_SeqBAIJ,
2928         0,
2929         0,
2930         0,
2931         0,
2932 /* 29*/ MatSetUp_SeqBAIJ,
2933         0,
2934         0,
2935         0,
2936         0,
2937 /* 34*/ MatDuplicate_SeqBAIJ,
2938         0,
2939         0,
2940         MatILUFactor_SeqBAIJ,
2941         0,
2942 /* 39*/ MatAXPY_SeqBAIJ,
2943         MatGetSubMatrices_SeqBAIJ,
2944         MatIncreaseOverlap_SeqBAIJ,
2945         MatGetValues_SeqBAIJ,
2946         MatCopy_SeqBAIJ,
2947 /* 44*/ 0,
2948         MatScale_SeqBAIJ,
2949         0,
2950         0,
2951         MatZeroRowsColumns_SeqBAIJ,
2952 /* 49*/ 0,
2953         MatGetRowIJ_SeqBAIJ,
2954         MatRestoreRowIJ_SeqBAIJ,
2955         MatGetColumnIJ_SeqBAIJ,
2956         MatRestoreColumnIJ_SeqBAIJ,
2957 /* 54*/ MatFDColoringCreate_SeqAIJ,
2958         0,
2959         0,
2960         0,
2961         MatSetValuesBlocked_SeqBAIJ,
2962 /* 59*/ MatGetSubMatrix_SeqBAIJ,
2963         MatDestroy_SeqBAIJ,
2964         MatView_SeqBAIJ,
2965         0,
2966         0,
2967 /* 64*/ 0,
2968         0,
2969         0,
2970         0,
2971         0,
2972 /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2973         0,
2974         MatConvert_Basic,
2975         0,
2976         0,
2977 /* 74*/ 0,
2978         MatFDColoringApply_BAIJ,
2979         0,
2980         0,
2981         0,
2982 /* 79*/ 0,
2983         0,
2984         0,
2985         0,
2986         MatLoad_SeqBAIJ,
2987 /* 84*/ 0,
2988         0,
2989         0,
2990         0,
2991         0,
2992 /* 89*/ 0,
2993         0,
2994         0,
2995         0,
2996         0,
2997 /* 94*/ 0,
2998         0,
2999         0,
3000         0,
3001         0,
3002 /* 99*/ 0,
3003         0,
3004         0,
3005         0,
3006         0,
3007 /*104*/ 0,
3008         MatRealPart_SeqBAIJ,
3009         MatImaginaryPart_SeqBAIJ,
3010         0,
3011         0,
3012 /*109*/ 0,
3013         0,
3014         0,
3015         0,
3016         MatMissingDiagonal_SeqBAIJ,
3017 /*114*/ 0,
3018         0,
3019         0,
3020         0,
3021         0,
3022 /*119*/ 0,
3023         0,
3024         MatMultHermitianTranspose_SeqBAIJ,
3025         MatMultHermitianTransposeAdd_SeqBAIJ,
3026         0,
3027 /*124*/ 0,
3028         0,
3029         MatInvertBlockDiagonal_SeqBAIJ
3030 };
3031 
3032 EXTERN_C_BEGIN
3033 #undef __FUNCT__
3034 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3035 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3036 {
3037   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3038   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3039   PetscErrorCode ierr;
3040 
3041   PetscFunctionBegin;
3042   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3043 
3044   /* allocate space for values if not already there */
3045   if (!aij->saved_values) {
3046     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3047     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3048   }
3049 
3050   /* copy values over */
3051   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3052   PetscFunctionReturn(0);
3053 }
3054 EXTERN_C_END
3055 
3056 EXTERN_C_BEGIN
3057 #undef __FUNCT__
3058 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3059 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3060 {
3061   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3062   PetscErrorCode ierr;
3063   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3064 
3065   PetscFunctionBegin;
3066   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3067   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3068 
3069   /* copy values over */
3070   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3071   PetscFunctionReturn(0);
3072 }
3073 EXTERN_C_END
3074 
3075 EXTERN_C_BEGIN
3076 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3077 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3078 EXTERN_C_END
3079 
3080 EXTERN_C_BEGIN
3081 #undef __FUNCT__
3082 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3083 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3084 {
3085   Mat_SeqBAIJ    *b;
3086   PetscErrorCode ierr;
3087   PetscInt       i,mbs,nbs,bs2;
3088   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3089 
3090   PetscFunctionBegin;
3091   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3092   if (nz == MAT_SKIP_ALLOCATION) {
3093     skipallocation = PETSC_TRUE;
3094     nz             = 0;
3095   }
3096 
3097   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3098   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3099   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3100   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3101   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3102 
3103   B->preallocated = PETSC_TRUE;
3104 
3105   mbs  = B->rmap->n/bs;
3106   nbs  = B->cmap->n/bs;
3107   bs2  = bs*bs;
3108 
3109   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3110 
3111   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3112   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3113   if (nnz) {
3114     for (i=0; i<mbs; i++) {
3115       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3116       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3117     }
3118   }
3119 
3120   b       = (Mat_SeqBAIJ*)B->data;
3121   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3122     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3123   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3124 
3125   if (!flg) {
3126     switch (bs) {
3127     case 1:
3128       B->ops->mult            = MatMult_SeqBAIJ_1;
3129       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3130       B->ops->sor             = MatSOR_SeqBAIJ_1;
3131       break;
3132     case 2:
3133       B->ops->mult            = MatMult_SeqBAIJ_2;
3134       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3135       B->ops->sor             = MatSOR_SeqBAIJ_2;
3136       break;
3137     case 3:
3138       B->ops->mult            = MatMult_SeqBAIJ_3;
3139       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3140       B->ops->sor             = MatSOR_SeqBAIJ_3;
3141       break;
3142     case 4:
3143       B->ops->mult            = MatMult_SeqBAIJ_4;
3144       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3145       B->ops->sor             = MatSOR_SeqBAIJ_4;
3146       break;
3147     case 5:
3148       B->ops->mult            = MatMult_SeqBAIJ_5;
3149       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3150       B->ops->sor             = MatSOR_SeqBAIJ_5;
3151       break;
3152     case 6:
3153       B->ops->mult            = MatMult_SeqBAIJ_6;
3154       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3155       B->ops->sor             = MatSOR_SeqBAIJ_6;
3156       break;
3157     case 7:
3158       B->ops->mult            = MatMult_SeqBAIJ_7;
3159       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3160       B->ops->sor             = MatSOR_SeqBAIJ_7;
3161       break;
3162     case 15:
3163       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3164       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3165       B->ops->sor             = MatSOR_SeqBAIJ_N;
3166       break;
3167     default:
3168       B->ops->mult            = MatMult_SeqBAIJ_N;
3169       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3170       B->ops->sor             = MatSOR_SeqBAIJ_N;
3171       break;
3172     }
3173   }
3174   b->mbs       = mbs;
3175   b->nbs       = nbs;
3176   if (!skipallocation) {
3177     if (!b->imax) {
3178       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3179       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3180       b->free_imax_ilen = PETSC_TRUE;
3181     }
3182     /* b->ilen will count nonzeros in each block row so far. */
3183     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3184     if (!nnz) {
3185       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3186       else if (nz < 0) nz = 1;
3187       for (i=0; i<mbs; i++) b->imax[i] = nz;
3188       nz = nz*mbs;
3189     } else {
3190       nz = 0;
3191       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3192     }
3193 
3194     /* allocate the matrix space */
3195     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3196     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3197     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3198     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3199     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3200     b->singlemalloc = PETSC_TRUE;
3201     b->i[0] = 0;
3202     for (i=1; i<mbs+1; i++) {
3203       b->i[i] = b->i[i-1] + b->imax[i-1];
3204     }
3205     b->free_a     = PETSC_TRUE;
3206     b->free_ij    = PETSC_TRUE;
3207   } else {
3208     b->free_a     = PETSC_FALSE;
3209     b->free_ij    = PETSC_FALSE;
3210   }
3211 
3212   b->bs2              = bs2;
3213   b->mbs              = mbs;
3214   b->nz               = 0;
3215   b->maxnz            = nz;
3216   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3217   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3218   PetscFunctionReturn(0);
3219 }
3220 EXTERN_C_END
3221 
3222 EXTERN_C_BEGIN
3223 #undef __FUNCT__
3224 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3225 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3226 {
3227   PetscInt       i,m,nz,nz_max=0,*nnz;
3228   PetscScalar    *values=0;
3229   PetscErrorCode ierr;
3230 
3231   PetscFunctionBegin;
3232   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3233   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3234   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3235   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3236   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3237   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3238   m = B->rmap->n/bs;
3239 
3240   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3241   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3242   for (i=0; i<m; i++) {
3243     nz = ii[i+1]- ii[i];
3244     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3245     nz_max = PetscMax(nz_max, nz);
3246     nnz[i] = nz;
3247   }
3248   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3249   ierr = PetscFree(nnz);CHKERRQ(ierr);
3250 
3251   values = (PetscScalar*)V;
3252   if (!values) {
3253     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3254     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3255   }
3256   for (i=0; i<m; i++) {
3257     PetscInt          ncols  = ii[i+1] - ii[i];
3258     const PetscInt    *icols = jj + ii[i];
3259     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3260     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3261   }
3262   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3263   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3264   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3265   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3266   PetscFunctionReturn(0);
3267 }
3268 EXTERN_C_END
3269 
3270 
3271 EXTERN_C_BEGIN
3272 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3273 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3274 #if defined(PETSC_HAVE_MUMPS)
3275 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3276 #endif
3277 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3278 EXTERN_C_END
3279 
3280 /*MC
3281    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3282    block sparse compressed row format.
3283 
3284    Options Database Keys:
3285 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3286 
3287   Level: beginner
3288 
3289 .seealso: MatCreateSeqBAIJ()
3290 M*/
3291 
3292 EXTERN_C_BEGIN
3293 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3294 EXTERN_C_END
3295 
3296 EXTERN_C_BEGIN
3297 #undef __FUNCT__
3298 #define __FUNCT__ "MatCreate_SeqBAIJ"
3299 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3300 {
3301   PetscErrorCode ierr;
3302   PetscMPIInt    size;
3303   Mat_SeqBAIJ    *b;
3304 
3305   PetscFunctionBegin;
3306   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3307   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3308 
3309   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3310   B->data = (void*)b;
3311   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3312   b->row                   = 0;
3313   b->col                   = 0;
3314   b->icol                  = 0;
3315   b->reallocs              = 0;
3316   b->saved_values          = 0;
3317 
3318   b->roworiented           = PETSC_TRUE;
3319   b->nonew                 = 0;
3320   b->diag                  = 0;
3321   b->solve_work            = 0;
3322   b->mult_work             = 0;
3323   B->spptr                 = 0;
3324   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3325   b->keepnonzeropattern    = PETSC_FALSE;
3326   b->xtoy                  = 0;
3327   b->XtoY                  = 0;
3328   B->same_nonzero          = PETSC_FALSE;
3329 
3330   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3331                                      "MatGetFactorAvailable_seqbaij_petsc",
3332                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3333   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3334                                      "MatGetFactor_seqbaij_petsc",
3335                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3336   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3337                                      "MatGetFactor_seqbaij_bstrm",
3338                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3339 #if defined(PETSC_HAVE_MUMPS)
3340   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3341 #endif
3342   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3343                                      "MatInvertBlockDiagonal_SeqBAIJ",
3344                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3345   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3346                                      "MatStoreValues_SeqBAIJ",
3347                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3348   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3349                                      "MatRetrieveValues_SeqBAIJ",
3350                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3351   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3352                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3353                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3354   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3355                                      "MatConvert_SeqBAIJ_SeqAIJ",
3356                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3357   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3358                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3359                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3360   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3361                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3362                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3363   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3364                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3365                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3366   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3367                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3368                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3369   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3370                                      "MatIsTranspose_SeqBAIJ",
3371                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3372   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3373   PetscFunctionReturn(0);
3374 }
3375 EXTERN_C_END
3376 
3377 #undef __FUNCT__
3378 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3379 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3380 {
3381   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3382   PetscErrorCode ierr;
3383   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3384 
3385   PetscFunctionBegin;
3386   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3387 
3388   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3389     c->imax = a->imax;
3390     c->ilen = a->ilen;
3391     c->free_imax_ilen = PETSC_FALSE;
3392   } else {
3393     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3394     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3395     for (i=0; i<mbs; i++) {
3396       c->imax[i] = a->imax[i];
3397       c->ilen[i] = a->ilen[i];
3398     }
3399     c->free_imax_ilen = PETSC_TRUE;
3400   }
3401 
3402   /* allocate the matrix space */
3403   if (mallocmatspace) {
3404     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3405       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3406       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3407       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3408       c->i            = a->i;
3409       c->j            = a->j;
3410       c->singlemalloc = PETSC_FALSE;
3411       c->free_a       = PETSC_TRUE;
3412       c->free_ij      = PETSC_FALSE;
3413       c->parent       = A;
3414       C->preallocated = PETSC_TRUE;
3415       C->assembled    = PETSC_TRUE;
3416       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3417       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3418       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3419     } else {
3420       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3421       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3422       c->singlemalloc = PETSC_TRUE;
3423       c->free_a       = PETSC_TRUE;
3424       c->free_ij      = PETSC_TRUE;
3425       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3426       if (mbs > 0) {
3427         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3428         if (cpvalues == MAT_COPY_VALUES) {
3429           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3430         } else {
3431           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3432         }
3433       }
3434       C->preallocated = PETSC_TRUE;
3435       C->assembled    = PETSC_TRUE;
3436     }
3437   }
3438 
3439   c->roworiented = a->roworiented;
3440   c->nonew       = a->nonew;
3441   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3442   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3443   c->bs2         = a->bs2;
3444   c->mbs         = a->mbs;
3445   c->nbs         = a->nbs;
3446 
3447   if (a->diag) {
3448     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3449       c->diag      = a->diag;
3450       c->free_diag = PETSC_FALSE;
3451     } else {
3452       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3453       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3454       for (i=0; i<mbs; i++) {
3455         c->diag[i] = a->diag[i];
3456       }
3457       c->free_diag = PETSC_TRUE;
3458     }
3459   } else c->diag        = 0;
3460   c->nz                 = a->nz;
3461   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3462   c->solve_work         = 0;
3463   c->mult_work          = 0;
3464 
3465   c->compressedrow.use     = a->compressedrow.use;
3466   c->compressedrow.nrows   = a->compressedrow.nrows;
3467   c->compressedrow.check   = a->compressedrow.check;
3468   if (a->compressedrow.use) {
3469     i = a->compressedrow.nrows;
3470     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3471     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3472     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3473     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3474   } else {
3475     c->compressedrow.use    = PETSC_FALSE;
3476     c->compressedrow.i      = PETSC_NULL;
3477     c->compressedrow.rindex = PETSC_NULL;
3478   }
3479   C->same_nonzero = A->same_nonzero;
3480   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3481   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3482   PetscFunctionReturn(0);
3483 }
3484 
3485 #undef __FUNCT__
3486 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3487 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3488 {
3489   PetscErrorCode ierr;
3490 
3491   PetscFunctionBegin;
3492   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3493   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3494   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3495   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3496   PetscFunctionReturn(0);
3497 }
3498 
3499 #undef __FUNCT__
3500 #define __FUNCT__ "MatLoad_SeqBAIJ"
3501 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3502 {
3503   Mat_SeqBAIJ    *a;
3504   PetscErrorCode ierr;
3505   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3506   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3507   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3508   PetscInt       *masked,nmask,tmp,bs2,ishift;
3509   PetscMPIInt    size;
3510   int            fd;
3511   PetscScalar    *aa;
3512   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3513 
3514   PetscFunctionBegin;
3515   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3516   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3517   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3518   bs2  = bs*bs;
3519 
3520   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3521   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3522   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3523   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3524   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3525   M = header[1]; N = header[2]; nz = header[3];
3526 
3527   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3528   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3529 
3530   /*
3531      This code adds extra rows to make sure the number of rows is
3532     divisible by the blocksize
3533   */
3534   mbs        = M/bs;
3535   extra_rows = bs - M + bs*(mbs);
3536   if (extra_rows == bs) extra_rows = 0;
3537   else                  mbs++;
3538   if (extra_rows) {
3539     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3540   }
3541 
3542   /* Set global sizes if not already set */
3543   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3544     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3545   } else { /* Check if the matrix global sizes are correct */
3546     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3547     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3548       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3549     }
3550     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3551   }
3552 
3553   /* read in row lengths */
3554   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3555   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3556   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3557 
3558   /* read in column indices */
3559   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3560   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3561   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3562 
3563   /* loop over row lengths determining block row lengths */
3564   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3565   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3566   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3567   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3568   rowcount = 0;
3569   nzcount = 0;
3570   for (i=0; i<mbs; i++) {
3571     nmask = 0;
3572     for (j=0; j<bs; j++) {
3573       kmax = rowlengths[rowcount];
3574       for (k=0; k<kmax; k++) {
3575         tmp = jj[nzcount++]/bs;
3576         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3577       }
3578       rowcount++;
3579     }
3580     browlengths[i] += nmask;
3581     /* zero out the mask elements we set */
3582     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3583   }
3584 
3585   /* Do preallocation  */
3586   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3587   a = (Mat_SeqBAIJ*)newmat->data;
3588 
3589   /* set matrix "i" values */
3590   a->i[0] = 0;
3591   for (i=1; i<= mbs; i++) {
3592     a->i[i]      = a->i[i-1] + browlengths[i-1];
3593     a->ilen[i-1] = browlengths[i-1];
3594   }
3595   a->nz         = 0;
3596   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3597 
3598   /* read in nonzero values */
3599   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3600   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3601   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3602 
3603   /* set "a" and "j" values into matrix */
3604   nzcount = 0; jcount = 0;
3605   for (i=0; i<mbs; i++) {
3606     nzcountb = nzcount;
3607     nmask    = 0;
3608     for (j=0; j<bs; j++) {
3609       kmax = rowlengths[i*bs+j];
3610       for (k=0; k<kmax; k++) {
3611         tmp = jj[nzcount++]/bs;
3612         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3613       }
3614     }
3615     /* sort the masked values */
3616     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3617 
3618     /* set "j" values into matrix */
3619     maskcount = 1;
3620     for (j=0; j<nmask; j++) {
3621       a->j[jcount++]  = masked[j];
3622       mask[masked[j]] = maskcount++;
3623     }
3624     /* set "a" values into matrix */
3625     ishift = bs2*a->i[i];
3626     for (j=0; j<bs; j++) {
3627       kmax = rowlengths[i*bs+j];
3628       for (k=0; k<kmax; k++) {
3629         tmp       = jj[nzcountb]/bs ;
3630         block     = mask[tmp] - 1;
3631         point     = jj[nzcountb] - bs*tmp;
3632         idx       = ishift + bs2*block + j + bs*point;
3633         a->a[idx] = (MatScalar)aa[nzcountb++];
3634       }
3635     }
3636     /* zero out the mask elements we set */
3637     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3638   }
3639   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3640 
3641   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3642   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3643   ierr = PetscFree(aa);CHKERRQ(ierr);
3644   ierr = PetscFree(jj);CHKERRQ(ierr);
3645   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3646 
3647   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3648   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3649   PetscFunctionReturn(0);
3650 }
3651 
3652 #undef __FUNCT__
3653 #define __FUNCT__ "MatCreateSeqBAIJ"
3654 /*@C
3655    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3656    compressed row) format.  For good matrix assembly performance the
3657    user should preallocate the matrix storage by setting the parameter nz
3658    (or the array nnz).  By setting these parameters accurately, performance
3659    during matrix assembly can be increased by more than a factor of 50.
3660 
3661    Collective on MPI_Comm
3662 
3663    Input Parameters:
3664 +  comm - MPI communicator, set to PETSC_COMM_SELF
3665 .  bs - size of block
3666 .  m - number of rows
3667 .  n - number of columns
3668 .  nz - number of nonzero blocks  per block row (same for all rows)
3669 -  nnz - array containing the number of nonzero blocks in the various block rows
3670          (possibly different for each block row) or PETSC_NULL
3671 
3672    Output Parameter:
3673 .  A - the matrix
3674 
3675    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3676    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3677    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3678 
3679    Options Database Keys:
3680 .   -mat_no_unroll - uses code that does not unroll the loops in the
3681                      block calculations (much slower)
3682 .    -mat_block_size - size of the blocks to use
3683 
3684    Level: intermediate
3685 
3686    Notes:
3687    The number of rows and columns must be divisible by blocksize.
3688 
3689    If the nnz parameter is given then the nz parameter is ignored
3690 
3691    A nonzero block is any block that as 1 or more nonzeros in it
3692 
3693    The block AIJ format is fully compatible with standard Fortran 77
3694    storage.  That is, the stored row and column indices can begin at
3695    either one (as in Fortran) or zero.  See the users' manual for details.
3696 
3697    Specify the preallocated storage with either nz or nnz (not both).
3698    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3699    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3700    matrices.
3701 
3702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3703 @*/
3704 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3705 {
3706   PetscErrorCode ierr;
3707 
3708   PetscFunctionBegin;
3709   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3710   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3711   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3712   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3713   PetscFunctionReturn(0);
3714 }
3715 
3716 #undef __FUNCT__
3717 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3718 /*@C
3719    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3720    per row in the matrix. For good matrix assembly performance the
3721    user should preallocate the matrix storage by setting the parameter nz
3722    (or the array nnz).  By setting these parameters accurately, performance
3723    during matrix assembly can be increased by more than a factor of 50.
3724 
3725    Collective on MPI_Comm
3726 
3727    Input Parameters:
3728 +  A - the matrix
3729 .  bs - size of block
3730 .  nz - number of block nonzeros per block row (same for all rows)
3731 -  nnz - array containing the number of block nonzeros in the various block rows
3732          (possibly different for each block row) or PETSC_NULL
3733 
3734    Options Database Keys:
3735 .   -mat_no_unroll - uses code that does not unroll the loops in the
3736                      block calculations (much slower)
3737 .    -mat_block_size - size of the blocks to use
3738 
3739    Level: intermediate
3740 
3741    Notes:
3742    If the nnz parameter is given then the nz parameter is ignored
3743 
3744    You can call MatGetInfo() to get information on how effective the preallocation was;
3745    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3746    You can also run with the option -info and look for messages with the string
3747    malloc in them to see if additional memory allocation was needed.
3748 
3749    The block AIJ format is fully compatible with standard Fortran 77
3750    storage.  That is, the stored row and column indices can begin at
3751    either one (as in Fortran) or zero.  See the users' manual for details.
3752 
3753    Specify the preallocated storage with either nz or nnz (not both).
3754    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3755    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3756 
3757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3758 @*/
3759 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3760 {
3761   PetscErrorCode ierr;
3762 
3763   PetscFunctionBegin;
3764   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3765   PetscValidType(B,1);
3766   PetscValidLogicalCollectiveInt(B,bs,2);
3767   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3768   PetscFunctionReturn(0);
3769 }
3770 
3771 #undef __FUNCT__
3772 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3773 /*@C
3774    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3775    (the default sequential PETSc format).
3776 
3777    Collective on MPI_Comm
3778 
3779    Input Parameters:
3780 +  A - the matrix
3781 .  i - the indices into j for the start of each local row (starts with zero)
3782 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3783 -  v - optional values in the matrix
3784 
3785    Level: developer
3786 
3787 .keywords: matrix, aij, compressed row, sparse
3788 
3789 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3790 @*/
3791 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3792 {
3793   PetscErrorCode ierr;
3794 
3795   PetscFunctionBegin;
3796   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3797   PetscValidType(B,1);
3798   PetscValidLogicalCollectiveInt(B,bs,2);
3799   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3800   PetscFunctionReturn(0);
3801 }
3802 
3803 
3804 #undef __FUNCT__
3805 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3806 /*@
3807      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3808 
3809      Collective on MPI_Comm
3810 
3811    Input Parameters:
3812 +  comm - must be an MPI communicator of size 1
3813 .  bs - size of block
3814 .  m - number of rows
3815 .  n - number of columns
3816 .  i - row indices
3817 .  j - column indices
3818 -  a - matrix values
3819 
3820    Output Parameter:
3821 .  mat - the matrix
3822 
3823    Level: advanced
3824 
3825    Notes:
3826        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3827     once the matrix is destroyed
3828 
3829        You cannot set new nonzero locations into this matrix, that will generate an error.
3830 
3831        The i and j indices are 0 based
3832 
3833        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3834 
3835 
3836 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3837 
3838 @*/
3839 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3840 {
3841   PetscErrorCode ierr;
3842   PetscInt       ii;
3843   Mat_SeqBAIJ    *baij;
3844 
3845   PetscFunctionBegin;
3846   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3847   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3848 
3849   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3850   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3851   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3852   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3853   baij = (Mat_SeqBAIJ*)(*mat)->data;
3854   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3855   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3856 
3857   baij->i = i;
3858   baij->j = j;
3859   baij->a = a;
3860   baij->singlemalloc = PETSC_FALSE;
3861   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3862   baij->free_a       = PETSC_FALSE;
3863   baij->free_ij      = PETSC_FALSE;
3864 
3865   for (ii=0; ii<m; ii++) {
3866     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3867 #if defined(PETSC_USE_DEBUG)
3868     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3869 #endif
3870   }
3871 #if defined(PETSC_USE_DEBUG)
3872   for (ii=0; ii<baij->i[m]; ii++) {
3873     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3874     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3875   }
3876 #endif
3877 
3878   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3879   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3880   PetscFunctionReturn(0);
3881 }
3882