xref: /petsc/src/mat/impls/baij/seq/baij.c (revision d54338ec4fdf48b56b5efdd9bf04ca9d50d6ab04)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values)*values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag    = a->idiag;
33   mdiag   = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37     case 1:
38       for (i=0; i<mbs; i++) {
39         odiag = v + 1*diag_offset[i];
40         diag[0]  = odiag[0];
41         mdiag[0] = odiag[0];
42         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43         diag    += 1;
44         mdiag   += 1;
45       }
46       break;
47     case 2:
48       for (i=0; i<mbs; i++) {
49         odiag   = v + 4*diag_offset[i];
50         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52         ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53         diag    += 4;
54         mdiag   += 4;
55       }
56       break;
57     case 3:
58       for (i=0; i<mbs; i++) {
59         odiag    = v + 9*diag_offset[i];
60         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62         diag[8]  = odiag[8];
63         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65         mdiag[8] = odiag[8];
66         ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67         diag    += 9;
68         mdiag   += 9;
69       }
70       break;
71     case 4:
72       for (i=0; i<mbs; i++) {
73         odiag  = v + 16*diag_offset[i];
74         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76         ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77         diag  += 16;
78         mdiag += 16;
79       }
80       break;
81     case 5:
82       for (i=0; i<mbs; i++) {
83         odiag = v + 25*diag_offset[i];
84         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86         ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87         diag  += 25;
88         mdiag += 25;
89       }
90       break;
91     case 6:
92       for (i=0; i<mbs; i++) {
93         odiag = v + 36*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96         ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97         diag  += 36;
98         mdiag += 36;
99       }
100       break;
101     case 7:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 49*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106         ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107         diag  += 49;
108         mdiag += 49;
109       }
110       break;
111     default:
112       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113       for (i=0; i<mbs; i++) {
114         odiag = v + bs2*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117         ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118         diag  += bs2;
119         mdiag += bs2;
120       }
121       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,j;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         for (j=0; j<nz; j++) {
166           s1 -= v[j]*x[vi[j]];
167         }
168         x[i2]   = idiag[0]*s1;
169         idiag   += 1;
170         i2      += 1;
171       }
172       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
174     }
175     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177       i2    = 0;
178       mdiag = a->idiag+a->mbs;
179       for (i=0; i<m; i++) {
180         x1      = x[i2];
181         x[i2]   = mdiag[0]*x1;
182         mdiag  += 1;
183         i2     += 1;
184       }
185       ierr = PetscLogFlops(m);CHKERRQ(ierr);
186     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
188     }
189     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
190       idiag   = a->idiag+a->mbs - 1;
191       i2      = m - 1;
192       x1      = x[i2];
193       x[i2]   = idiag[0]*x1;
194       idiag -= 1;
195       i2    -= 1;
196       for (i=m-2; i>=0; i--) {
197         v     = aa + (diag[i]+1);
198         vi    = aj + diag[i] + 1;
199         nz    = ai[i+1] - diag[i] - 1;
200         s1    = x[i2];
201         for (j=0; j<nz; j++) {
202           s1 -= v[j]*x[vi[j]];
203         }
204         x[i2]   = idiag[0]*s1;
205         idiag   -= 1;
206         i2      -= 1;
207       }
208       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,s1,s2;
224   const PetscScalar  *b;
225   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232   its = its*lits;
233   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
248       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250       i2     = 2;
251       idiag += 4;
252       for (i=1; i<m; i++) {
253         v     = aa + 4*ai[i];
254         vi    = aj + ai[i];
255         nz    = diag[i] - ai[i];
256         s1    = b[i2]; s2 = b[i2+1];
257         for (j=0; j<nz; j++) {
258           idx  = 2*vi[j];
259           it   = 4*j;
260           x1   = x[idx]; x2 = x[1+idx];
261           s1  -= v[it]*x1 + v[it+2]*x2;
262           s2  -= v[it+1]*x1 + v[it+3]*x2;
263         }
264         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
265         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266         idiag   += 4;
267         i2      += 2;
268       }
269       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
271     }
272     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274       i2    = 0;
275       mdiag = a->idiag+4*a->mbs;
276       for (i=0; i<m; i++) {
277         x1      = x[i2]; x2 = x[i2+1];
278         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
279         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280         mdiag  += 4;
281         i2     += 2;
282       }
283       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
284     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
286     }
287     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288       idiag   = a->idiag+4*a->mbs - 4;
289       i2      = 2*m - 2;
290       x1      = x[i2]; x2 = x[i2+1];
291       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
292       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293       idiag -= 4;
294       i2    -= 2;
295       for (i=m-2; i>=0; i--) {
296         v     = aa + 4*(diag[i]+1);
297         vi    = aj + diag[i] + 1;
298         nz    = ai[i+1] - diag[i] - 1;
299         s1    = x[i2]; s2 = x[i2+1];
300         for (j=0; j<nz; j++) {
301           idx  = 2*vi[j];
302           it   = 4*j;
303           x1   = x[idx]; x2 = x[1+idx];
304           s1  -= v[it]*x1 + v[it+2]*x2;
305           s2  -= v[it+1]*x1 + v[it+3]*x2;
306         }
307         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
308         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309         idiag   -= 4;
310         i2      -= 2;
311       }
312       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
313     }
314   } else {
315     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316   }
317   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
318   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325 {
326   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
328   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
329   const PetscScalar  *b;
330   PetscErrorCode     ierr;
331   PetscInt           m = a->mbs,i,i2,nz,idx;
332   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
333 
334   PetscFunctionBegin;
335   its = its*lits;
336   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
342 
343   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
344 
345   diag  = a->diag;
346   idiag = a->idiag;
347   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
348   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
349 
350   if (flag & SOR_ZERO_INITIAL_GUESS) {
351     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
352       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355       i2     = 3;
356       idiag += 9;
357       for (i=1; i<m; i++) {
358         v     = aa + 9*ai[i];
359         vi    = aj + ai[i];
360         nz    = diag[i] - ai[i];
361         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362         while (nz--) {
363           idx  = 3*(*vi++);
364           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368           v   += 9;
369         }
370         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373         idiag   += 9;
374         i2      += 3;
375       }
376       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
378     }
379     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381       i2    = 0;
382       mdiag = a->idiag+9*a->mbs;
383       for (i=0; i<m; i++) {
384         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388         mdiag  += 9;
389         i2     += 3;
390       }
391       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
392     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
394     }
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
396       idiag   = a->idiag+9*a->mbs - 9;
397       i2      = 3*m - 3;
398       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402       idiag -= 9;
403       i2    -= 3;
404       for (i=m-2; i>=0; i--) {
405         v     = aa + 9*(diag[i]+1);
406         vi    = aj + diag[i] + 1;
407         nz    = ai[i+1] - diag[i] - 1;
408         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409         while (nz--) {
410           idx  = 3*(*vi++);
411           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415           v   += 9;
416         }
417         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420         idiag   -= 9;
421         i2      -= 3;
422       }
423       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
424     }
425   } else {
426     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427   }
428   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436 {
437   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
438   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
439   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
440   const PetscScalar  *b;
441   PetscErrorCode     ierr;
442   PetscInt           m = a->mbs,i,i2,nz,idx;
443   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
444 
445   PetscFunctionBegin;
446   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447   its = its*lits;
448   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
453 
454   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
455 
456   diag  = a->diag;
457   idiag = a->idiag;
458   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
459   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
460 
461   if (flag & SOR_ZERO_INITIAL_GUESS) {
462     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
463       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
464       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
465       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467       i2     = 4;
468       idiag += 16;
469       for (i=1; i<m; i++) {
470         v     = aa + 16*ai[i];
471         vi    = aj + ai[i];
472         nz    = diag[i] - ai[i];
473         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474         while (nz--) {
475           idx  = 4*(*vi++);
476           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
478           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
479           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481           v   += 16;
482         }
483         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
484         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
485         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487         idiag   += 16;
488         i2      += 4;
489       }
490       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
492     }
493     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495       i2    = 0;
496       mdiag = a->idiag+16*a->mbs;
497       for (i=0; i<m; i++) {
498         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
500         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
501         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503         mdiag  += 16;
504         i2     += 4;
505       }
506       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
507     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
509     }
510     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
511       idiag   = a->idiag+16*a->mbs - 16;
512       i2      = 4*m - 4;
513       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
515       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
516       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518       idiag -= 16;
519       i2    -= 4;
520       for (i=m-2; i>=0; i--) {
521         v     = aa + 16*(diag[i]+1);
522         vi    = aj + diag[i] + 1;
523         nz    = ai[i+1] - diag[i] - 1;
524         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525         while (nz--) {
526           idx  = 4*(*vi++);
527           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
529           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
530           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532           v   += 16;
533         }
534         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
535         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
536         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538         idiag   -= 16;
539         i2      -= 4;
540       }
541       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
542     }
543   } else {
544     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545   }
546   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
547   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554 {
555   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
556   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
558   const PetscScalar  *b;
559   PetscErrorCode     ierr;
560   PetscInt           m = a->mbs,i,i2,nz,idx;
561   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
562 
563   PetscFunctionBegin;
564   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565   its = its*lits;
566   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
571 
572   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
573 
574   diag  = a->diag;
575   idiag = a->idiag;
576   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
578 
579   if (flag & SOR_ZERO_INITIAL_GUESS) {
580     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
581       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586       i2     = 5;
587       idiag += 25;
588       for (i=1; i<m; i++) {
589         v     = aa + 25*ai[i];
590         vi    = aj + ai[i];
591         nz    = diag[i] - ai[i];
592         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593         while (nz--) {
594           idx  = 5*(*vi++);
595           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601           v   += 25;
602         }
603         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608         idiag   += 25;
609         i2      += 5;
610       }
611       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
613     }
614     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616       i2    = 0;
617       mdiag = a->idiag+25*a->mbs;
618       for (i=0; i<m; i++) {
619         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625         mdiag  += 25;
626         i2     += 5;
627       }
628       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
629     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
631     }
632     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
633       idiag   = a->idiag+25*a->mbs - 25;
634       i2      = 5*m - 5;
635       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641       idiag -= 25;
642       i2    -= 5;
643       for (i=m-2; i>=0; i--) {
644         v     = aa + 25*(diag[i]+1);
645         vi    = aj + diag[i] + 1;
646         nz    = ai[i+1] - diag[i] - 1;
647         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648         while (nz--) {
649           idx  = 5*(*vi++);
650           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656           v   += 25;
657         }
658         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663         idiag   -= 25;
664         i2      -= 5;
665       }
666       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
667     }
668   } else {
669     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670   }
671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
672   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679 {
680   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
681   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
683   const PetscScalar  *b;
684   PetscErrorCode     ierr;
685   PetscInt           m = a->mbs,i,i2,nz,idx;
686   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
687 
688   PetscFunctionBegin;
689   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690   its = its*lits;
691   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
696 
697   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
698 
699   diag  = a->diag;
700   idiag = a->idiag;
701   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
702   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
703 
704   if (flag & SOR_ZERO_INITIAL_GUESS) {
705     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
706       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712       i2     = 6;
713       idiag += 36;
714       for (i=1; i<m; i++) {
715         v     = aa + 36*ai[i];
716         vi    = aj + ai[i];
717         nz    = diag[i] - ai[i];
718         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719         while (nz--) {
720           idx  = 6*(*vi++);
721           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728           v   += 36;
729         }
730         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736         idiag   += 36;
737         i2      += 6;
738       }
739       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
741     }
742     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744       i2    = 0;
745       mdiag = a->idiag+36*a->mbs;
746       for (i=0; i<m; i++) {
747         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754         mdiag  += 36;
755         i2     += 6;
756       }
757       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
758     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
760     }
761     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
762       idiag   = a->idiag+36*a->mbs - 36;
763       i2      = 6*m - 6;
764       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771       idiag -= 36;
772       i2    -= 6;
773       for (i=m-2; i>=0; i--) {
774         v     = aa + 36*(diag[i]+1);
775         vi    = aj + diag[i] + 1;
776         nz    = ai[i+1] - diag[i] - 1;
777         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778         while (nz--) {
779           idx  = 6*(*vi++);
780           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787           v   += 36;
788         }
789         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795         idiag   -= 36;
796         i2      -= 6;
797       }
798       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
799     }
800   } else {
801     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802   }
803   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
804   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811 {
812   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
813   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
815   const PetscScalar  *b;
816   PetscErrorCode     ierr;
817   PetscInt           m = a->mbs,i,i2,nz,idx;
818   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
819 
820   PetscFunctionBegin;
821   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822   its = its*lits;
823   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
828 
829   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
830 
831   diag  = a->diag;
832   idiag = a->idiag;
833   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
834   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
835 
836   if (flag & SOR_ZERO_INITIAL_GUESS) {
837     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
838       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845       i2     = 7;
846       idiag += 49;
847       for (i=1; i<m; i++) {
848         v     = aa + 49*ai[i];
849         vi    = aj + ai[i];
850         nz    = diag[i] - ai[i];
851         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852         while (nz--) {
853           idx  = 7*(*vi++);
854           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862           v   += 49;
863         }
864         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871         idiag   += 49;
872         i2      += 7;
873       }
874       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
876     }
877     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879       i2    = 0;
880       mdiag = a->idiag+49*a->mbs;
881       for (i=0; i<m; i++) {
882         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890         mdiag  += 49;
891         i2     += 7;
892       }
893       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
894     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
896     }
897     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
898       idiag   = a->idiag+49*a->mbs - 49;
899       i2      = 7*m - 7;
900       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908       idiag -= 49;
909       i2    -= 7;
910       for (i=m-2; i>=0; i--) {
911         v     = aa + 49*(diag[i]+1);
912         vi    = aj + diag[i] + 1;
913         nz    = ai[i+1] - diag[i] - 1;
914         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915         while (nz--) {
916           idx  = 7*(*vi++);
917           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925           v   += 49;
926         }
927         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934         idiag   -= 49;
935         i2      -= 7;
936       }
937       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
938     }
939   } else {
940     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941   }
942   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
943   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950 {
951   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
952   PetscScalar        *x,*work,*w,*workt;
953   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
954   const PetscScalar  *b;
955   PetscErrorCode     ierr;
956   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
958 
959   PetscFunctionBegin;
960   its = its*lits;
961   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
967 
968   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
969 
970   diag  = a->diag;
971   idiag = a->idiag;
972   if (!a->mult_work) {
973     k    = PetscMax(A->rmap->n,A->cmap->n);
974     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
975   }
976   work = a->mult_work;
977   if (!a->sor_work) {
978     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
979   }
980   w = a->sor_work;
981 
982   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
984 
985   if (flag & SOR_ZERO_INITIAL_GUESS) {
986     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
987       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991       i2     = bs;
992       idiag += bs2;
993       for (i=1; i<m; i++) {
994         v     = aa + bs2*ai[i];
995         vi    = aj + ai[i];
996         nz    = diag[i] - ai[i];
997 
998         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
999         /* copy all rows of x that are needed into contiguous space */
1000         workt = work;
1001         for (j=0; j<nz; j++) {
1002           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1003           workt += bs;
1004         }
1005         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007         while (nz--) {
1008           idx  = N*(*vi++);
1009           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013           v   += N2;
1014           } */
1015 
1016         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1020 
1021         idiag   += bs2;
1022         i2      += bs;
1023       }
1024       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1026     }
1027     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029       i2    = 0;
1030       mdiag = a->idiag+bs2*a->mbs;
1031       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       for (i=0; i<m; i++) {
1033         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1038 
1039         mdiag  += bs2;
1040         i2     += bs;
1041       }
1042       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1043     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1045     }
1046     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1047       idiag   = a->idiag+bs2*a->mbs - bs2;
1048       i2      = bs*m - bs;
1049       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1050       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055       idiag -= bs2;
1056       i2    -= bs;
1057       for (i=m-2; i>=0; i--) {
1058         v     = aa + bs2*(diag[i]+1);
1059         vi    = aj + diag[i] + 1;
1060         nz    = ai[i+1] - diag[i] - 1;
1061 
1062         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1063         /* copy all rows of x that are needed into contiguous space */
1064         workt = work;
1065         for (j=0; j<nz; j++) {
1066           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1067           workt += bs;
1068         }
1069         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1071         while (nz--) {
1072           idx  = N*(*vi++);
1073           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077           v   += N2;
1078           } */
1079 
1080         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084         idiag   -= bs2;
1085         i2      -= bs;
1086       }
1087       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1088     }
1089   } else {
1090     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091   }
1092   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1093   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 /*
1098     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099 */
1100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103 #define matsetvaluesblocked4_ matsetvaluesblocked4
1104 #endif
1105 
1106 EXTERN_C_BEGIN
1107 #undef __FUNCT__
1108 #define __FUNCT__ "matsetvaluesblocked4_"
1109 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110 {
1111   Mat               A = *AA;
1112   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114   PetscInt          *ai=a->i,*ailen=a->ilen;
1115   PetscInt          *aj=a->j,stepval,lastcol = -1;
1116   const PetscScalar *value = v;
1117   MatScalar         *ap,*aa = a->a,*bap;
1118 
1119   PetscFunctionBegin;
1120   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121   stepval = (n-1)*4;
1122   for (k=0; k<m; k++) { /* loop over added rows */
1123     row  = im[k];
1124     rp   = aj + ai[row];
1125     ap   = aa + 16*ai[row];
1126     nrow = ailen[row];
1127     low  = 0;
1128     high = nrow;
1129     for (l=0; l<n; l++) { /* loop over added columns */
1130       col = in[l];
1131       if (col <= lastcol)  low = 0;
1132       else                high = nrow;
1133       lastcol = col;
1134       value = v + k*(stepval+4 + l)*4;
1135       while (high-low > 7) {
1136         t = (low+high)/2;
1137         if (rp[t] > col) high = t;
1138         else             low  = t;
1139       }
1140       for (i=low; i<high; i++) {
1141         if (rp[i] > col) break;
1142         if (rp[i] == col) {
1143           bap  = ap +  16*i;
1144           for (ii=0; ii<4; ii++,value+=stepval) {
1145             for (jj=ii; jj<16; jj+=4) {
1146               bap[jj] += *value++;
1147             }
1148           }
1149           goto noinsert2;
1150         }
1151       }
1152       N = nrow++ - 1;
1153       high++; /* added new column index thus must search to one higher than before */
1154       /* shift up all the later entries in this row */
1155       for (ii=N; ii>=i; ii--) {
1156         rp[ii+1] = rp[ii];
1157         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1158       }
1159       if (N >= i) {
1160         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1161       }
1162       rp[i] = col;
1163       bap   = ap +  16*i;
1164       for (ii=0; ii<4; ii++,value+=stepval) {
1165         for (jj=ii; jj<16; jj+=4) {
1166           bap[jj] = *value++;
1167         }
1168       }
1169       noinsert2:;
1170       low = i;
1171     }
1172     ailen[row] = nrow;
1173   }
1174   PetscFunctionReturnVoid();
1175 }
1176 EXTERN_C_END
1177 
1178 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1179 #define matsetvalues4_ MATSETVALUES4
1180 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1181 #define matsetvalues4_ matsetvalues4
1182 #endif
1183 
1184 EXTERN_C_BEGIN
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatSetValues4_"
1187 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1188 {
1189   Mat         A = *AA;
1190   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1191   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1192   PetscInt    *ai=a->i,*ailen=a->ilen;
1193   PetscInt    *aj=a->j,brow,bcol;
1194   PetscInt    ridx,cidx,lastcol = -1;
1195   MatScalar   *ap,value,*aa=a->a,*bap;
1196 
1197   PetscFunctionBegin;
1198   for (k=0; k<m; k++) { /* loop over added rows */
1199     row  = im[k]; brow = row/4;
1200     rp   = aj + ai[brow];
1201     ap   = aa + 16*ai[brow];
1202     nrow = ailen[brow];
1203     low  = 0;
1204     high = nrow;
1205     for (l=0; l<n; l++) { /* loop over added columns */
1206       col = in[l]; bcol = col/4;
1207       ridx = row % 4; cidx = col % 4;
1208       value = v[l + k*n];
1209       if (col <= lastcol)  low = 0;
1210       else                high = nrow;
1211       lastcol = col;
1212       while (high-low > 7) {
1213         t = (low+high)/2;
1214         if (rp[t] > bcol) high = t;
1215         else              low  = t;
1216       }
1217       for (i=low; i<high; i++) {
1218         if (rp[i] > bcol) break;
1219         if (rp[i] == bcol) {
1220           bap  = ap +  16*i + 4*cidx + ridx;
1221           *bap += value;
1222           goto noinsert1;
1223         }
1224       }
1225       N = nrow++ - 1;
1226       high++; /* added new column thus must search to one higher than before */
1227       /* shift up all the later entries in this row */
1228       for (ii=N; ii>=i; ii--) {
1229         rp[ii+1] = rp[ii];
1230         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1231       }
1232       if (N>=i) {
1233         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1234       }
1235       rp[i]                    = bcol;
1236       ap[16*i + 4*cidx + ridx] = value;
1237       noinsert1:;
1238       low = i;
1239     }
1240     ailen[brow] = nrow;
1241   }
1242   PetscFunctionReturnVoid();
1243 }
1244 EXTERN_C_END
1245 
1246 /*
1247      Checks for missing diagonals
1248 */
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1251 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1252 {
1253   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1254   PetscErrorCode ierr;
1255   PetscInt       *diag,*jj = a->j,i;
1256 
1257   PetscFunctionBegin;
1258   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1259   *missing = PETSC_FALSE;
1260   if (A->rmap->n > 0 && !jj) {
1261     *missing  = PETSC_TRUE;
1262     if (d) *d = 0;
1263     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1264   } else {
1265     diag     = a->diag;
1266     for (i=0; i<a->mbs; i++) {
1267       if (jj[diag[i]] != i) {
1268         *missing  = PETSC_TRUE;
1269         if (d) *d = i;
1270         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1271         break;
1272       }
1273     }
1274   }
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1280 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1281 {
1282   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1283   PetscErrorCode ierr;
1284   PetscInt       i,j,m = a->mbs;
1285 
1286   PetscFunctionBegin;
1287   if (!a->diag) {
1288     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1289     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1290     a->free_diag = PETSC_TRUE;
1291   }
1292   for (i=0; i<m; i++) {
1293     a->diag[i] = a->i[i+1];
1294     for (j=a->i[i]; j<a->i[i+1]; j++) {
1295       if (a->j[j] == i) {
1296         a->diag[i] = j;
1297         break;
1298       }
1299     }
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1307 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1308 {
1309   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1312   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1313 
1314   PetscFunctionBegin;
1315   *nn = n;
1316   if (!ia) PetscFunctionReturn(0);
1317   if (symmetric) {
1318     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1319     nz   = tia[n];
1320   } else {
1321     tia = a->i; tja = a->j;
1322   }
1323 
1324   if (!blockcompressed && bs > 1) {
1325     (*nn) *= bs;
1326     /* malloc & create the natural set of indices */
1327     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1328     if (n) {
1329       (*ia)[0] = 0;
1330       for (j=1; j<bs; j++) {
1331         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1332       }
1333     }
1334 
1335     for (i=1; i<n; i++) {
1336       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1337       for (j=1; j<bs; j++) {
1338         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1339       }
1340     }
1341     if (n) {
1342       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1343     }
1344 
1345     if (inja) {
1346       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1347       cnt = 0;
1348       for (i=0; i<n; i++) {
1349         for (j=0; j<bs; j++) {
1350           for (k=tia[i]; k<tia[i+1]; k++) {
1351             for (l=0; l<bs; l++) {
1352               (*ja)[cnt++] = bs*tja[k] + l;
1353             }
1354           }
1355         }
1356       }
1357     }
1358 
1359     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1360       ierr = PetscFree(tia);CHKERRQ(ierr);
1361       ierr = PetscFree(tja);CHKERRQ(ierr);
1362     }
1363   } else if (oshift == 1) {
1364     if (symmetric) {
1365       nz = tia[A->rmap->n/bs];
1366       /*  add 1 to i and j indices */
1367       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1368       *ia = tia;
1369       if (ja) {
1370         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1371         *ja = tja;
1372       }
1373     } else {
1374       nz = a->i[A->rmap->n/bs];
1375       /* malloc space and  add 1 to i and j indices */
1376       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1377       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1378       if (ja) {
1379         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1380         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1381       }
1382     }
1383   } else {
1384     *ia = tia;
1385     if (ja) *ja = tja;
1386   }
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 #undef __FUNCT__
1391 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1392 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1393 {
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397   if (!ia) PetscFunctionReturn(0);
1398   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1399     ierr = PetscFree(*ia);CHKERRQ(ierr);
1400     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1407 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1408 {
1409   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413 #if defined(PETSC_USE_LOG)
1414   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1415 #endif
1416   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1417   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1418   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1419   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1420   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1421   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1422   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1423   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1424   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1425   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1426   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1427   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1428   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1429 
1430   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1431   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1432   ierr = PetscFree(A->data);CHKERRQ(ierr);
1433 
1434   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1435   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1436   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1437   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1443   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1444   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1450 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1451 {
1452   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1453   PetscErrorCode ierr;
1454 
1455   PetscFunctionBegin;
1456   switch (op) {
1457   case MAT_ROW_ORIENTED:
1458     a->roworiented    = flg;
1459     break;
1460   case MAT_KEEP_NONZERO_PATTERN:
1461     a->keepnonzeropattern = flg;
1462     break;
1463   case MAT_NEW_NONZERO_LOCATIONS:
1464     a->nonew          = (flg ? 0 : 1);
1465     break;
1466   case MAT_NEW_NONZERO_LOCATION_ERR:
1467     a->nonew          = (flg ? -1 : 0);
1468     break;
1469   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1470     a->nonew          = (flg ? -2 : 0);
1471     break;
1472   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1473     a->nounused       = (flg ? -1 : 0);
1474     break;
1475   case MAT_CHECK_COMPRESSED_ROW:
1476     a->compressedrow.check = flg;
1477     break;
1478   case MAT_NEW_DIAGONALS:
1479   case MAT_IGNORE_OFF_PROC_ENTRIES:
1480   case MAT_USE_HASH_TABLE:
1481     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1482     break;
1483   case MAT_SPD:
1484   case MAT_SYMMETRIC:
1485   case MAT_STRUCTURALLY_SYMMETRIC:
1486   case MAT_HERMITIAN:
1487   case MAT_SYMMETRY_ETERNAL:
1488     /* These options are handled directly by MatSetOption() */
1489     break;
1490   default:
1491     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1492   }
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1498 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1499 {
1500   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1501   PetscErrorCode ierr;
1502   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1503   MatScalar      *aa,*aa_i;
1504   PetscScalar    *v_i;
1505 
1506   PetscFunctionBegin;
1507   bs  = A->rmap->bs;
1508   ai  = a->i;
1509   aj  = a->j;
1510   aa  = a->a;
1511   bs2 = a->bs2;
1512 
1513   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1514 
1515   bn  = row/bs;   /* Block number */
1516   bp  = row % bs; /* Block Position */
1517   M   = ai[bn+1] - ai[bn];
1518   *nz = bs*M;
1519 
1520   if (v) {
1521     *v = 0;
1522     if (*nz) {
1523       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1524       for (i=0; i<M; i++) { /* for each block in the block row */
1525         v_i  = *v + i*bs;
1526         aa_i = aa + bs2*(ai[bn] + i);
1527         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1528       }
1529     }
1530   }
1531 
1532   if (idx) {
1533     *idx = 0;
1534     if (*nz) {
1535       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1536       for (i=0; i<M; i++) { /* for each block in the block row */
1537         idx_i = *idx + i*bs;
1538         itmp  = bs*aj[ai[bn] + i];
1539         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1540       }
1541     }
1542   }
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1548 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1549 {
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1554   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1559 
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1562 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1563 {
1564   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1565   Mat            C;
1566   PetscErrorCode ierr;
1567   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1568   PetscInt       *rows,*cols,bs2=a->bs2;
1569   MatScalar      *array;
1570 
1571   PetscFunctionBegin;
1572   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1573   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1574     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1575     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1576 
1577     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1578     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1579     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1580     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1581     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1582     ierr = PetscFree(col);CHKERRQ(ierr);
1583   } else {
1584     C = *B;
1585   }
1586 
1587   array = a->a;
1588   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1589   for (i=0; i<mbs; i++) {
1590     cols[0] = i*bs;
1591     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1592     len = ai[i+1] - ai[i];
1593     for (j=0; j<len; j++) {
1594       rows[0] = (*aj++)*bs;
1595       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1596       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1597       array += bs2;
1598     }
1599   }
1600   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1601 
1602   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1603   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1604 
1605   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1606     *B = C;
1607   } else {
1608     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1609   }
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 EXTERN_C_BEGIN
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1616 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1617 {
1618   PetscErrorCode ierr;
1619   Mat            Btrans;
1620 
1621   PetscFunctionBegin;
1622   *f = PETSC_FALSE;
1623   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1624   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1625   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1626   PetscFunctionReturn(0);
1627 }
1628 EXTERN_C_END
1629 
1630 #undef __FUNCT__
1631 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1632 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1633 {
1634   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1635   PetscErrorCode ierr;
1636   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1637   int            fd;
1638   PetscScalar    *aa;
1639   FILE           *file;
1640 
1641   PetscFunctionBegin;
1642   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1643   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1644   col_lens[0] = MAT_FILE_CLASSID;
1645 
1646   col_lens[1] = A->rmap->N;
1647   col_lens[2] = A->cmap->n;
1648   col_lens[3] = a->nz*bs2;
1649 
1650   /* store lengths of each row and write (including header) to file */
1651   count = 0;
1652   for (i=0; i<a->mbs; i++) {
1653     for (j=0; j<bs; j++) {
1654       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1655     }
1656   }
1657   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1658   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1659 
1660   /* store column indices (zero start index) */
1661   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1662   count = 0;
1663   for (i=0; i<a->mbs; i++) {
1664     for (j=0; j<bs; j++) {
1665       for (k=a->i[i]; k<a->i[i+1]; k++) {
1666         for (l=0; l<bs; l++) {
1667           jj[count++] = bs*a->j[k] + l;
1668         }
1669       }
1670     }
1671   }
1672   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1673   ierr = PetscFree(jj);CHKERRQ(ierr);
1674 
1675   /* store nonzero values */
1676   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1677   count = 0;
1678   for (i=0; i<a->mbs; i++) {
1679     for (j=0; j<bs; j++) {
1680       for (k=a->i[i]; k<a->i[i+1]; k++) {
1681         for (l=0; l<bs; l++) {
1682           aa[count++] = a->a[bs2*k + l*bs + j];
1683         }
1684       }
1685     }
1686   }
1687   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1688   ierr = PetscFree(aa);CHKERRQ(ierr);
1689 
1690   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1691   if (file) {
1692     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1693   }
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1699 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1700 {
1701   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1702   PetscErrorCode    ierr;
1703   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1704   PetscViewerFormat format;
1705 
1706   PetscFunctionBegin;
1707   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1708   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1709     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1710   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1711     Mat aij;
1712     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1713     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1714     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1715   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1716      PetscFunctionReturn(0);
1717   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1718     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1719     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1720     for (i=0; i<a->mbs; i++) {
1721       for (j=0; j<bs; j++) {
1722         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1723         for (k=a->i[i]; k<a->i[i+1]; k++) {
1724           for (l=0; l<bs; l++) {
1725 #if defined(PETSC_USE_COMPLEX)
1726             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1727               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1728                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1729             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1730               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1731                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1732             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1733               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1734             }
1735 #else
1736             if (a->a[bs2*k + l*bs + j] != 0.0) {
1737               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1738             }
1739 #endif
1740           }
1741         }
1742         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1743       }
1744     }
1745     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1746   } else {
1747     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1748     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1749     for (i=0; i<a->mbs; i++) {
1750       for (j=0; j<bs; j++) {
1751         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1752         for (k=a->i[i]; k<a->i[i+1]; k++) {
1753           for (l=0; l<bs; l++) {
1754 #if defined(PETSC_USE_COMPLEX)
1755             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1756               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1757                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1758             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1759               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1760                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1761             } else {
1762               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1763             }
1764 #else
1765             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1766 #endif
1767           }
1768         }
1769         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1770       }
1771     }
1772     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1773   }
1774   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #undef __FUNCT__
1779 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1780 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1781 {
1782   Mat            A = (Mat) Aa;
1783   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1784   PetscErrorCode ierr;
1785   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1786   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1787   MatScalar      *aa;
1788   PetscViewer    viewer;
1789   PetscViewerFormat format;
1790 
1791   PetscFunctionBegin;
1792   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1793   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1794 
1795   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1796 
1797   /* loop over matrix elements drawing boxes */
1798 
1799   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1800     color = PETSC_DRAW_BLUE;
1801     for (i=0,row=0; i<mbs; i++,row+=bs) {
1802       for (j=a->i[i]; j<a->i[i+1]; j++) {
1803         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1804         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1805         aa = a->a + j*bs2;
1806         for (k=0; k<bs; k++) {
1807           for (l=0; l<bs; l++) {
1808             if (PetscRealPart(*aa++) >=  0.) continue;
1809             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1810           }
1811         }
1812       }
1813     }
1814     color = PETSC_DRAW_CYAN;
1815     for (i=0,row=0; i<mbs; i++,row+=bs) {
1816       for (j=a->i[i]; j<a->i[i+1]; j++) {
1817         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1818         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1819         aa = a->a + j*bs2;
1820         for (k=0; k<bs; k++) {
1821           for (l=0; l<bs; l++) {
1822             if (PetscRealPart(*aa++) != 0.) continue;
1823             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1824           }
1825         }
1826       }
1827     }
1828     color = PETSC_DRAW_RED;
1829     for (i=0,row=0; i<mbs; i++,row+=bs) {
1830       for (j=a->i[i]; j<a->i[i+1]; j++) {
1831         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1832         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1833         aa = a->a + j*bs2;
1834         for (k=0; k<bs; k++) {
1835           for (l=0; l<bs; l++) {
1836             if (PetscRealPart(*aa++) <= 0.) continue;
1837             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1838           }
1839         }
1840       }
1841     }
1842   } else {
1843     /* use contour shading to indicate magnitude of values */
1844     /* first determine max of all nonzero values */
1845     PetscDraw   popup;
1846     PetscReal scale,maxv = 0.0;
1847 
1848     for (i=0; i<a->nz*a->bs2; i++) {
1849       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1850     }
1851     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1852     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1853     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1854     for (i=0,row=0; i<mbs; i++,row+=bs) {
1855       for (j=a->i[i]; j<a->i[i+1]; j++) {
1856         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1857         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1858         aa = a->a + j*bs2;
1859         for (k=0; k<bs; k++) {
1860           for (l=0; l<bs; l++) {
1861             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1862             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1863           }
1864         }
1865       }
1866     }
1867   }
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1873 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1874 {
1875   PetscErrorCode ierr;
1876   PetscReal      xl,yl,xr,yr,w,h;
1877   PetscDraw      draw;
1878   PetscBool      isnull;
1879 
1880   PetscFunctionBegin;
1881   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1882   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1883 
1884   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1885   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1886   xr += w;    yr += h;  xl = -w;     yl = -h;
1887   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1888   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1889   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatView_SeqBAIJ"
1895 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1896 {
1897   PetscErrorCode ierr;
1898   PetscBool      iascii,isbinary,isdraw;
1899 
1900   PetscFunctionBegin;
1901   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1902   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1903   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1904   if (iascii) {
1905     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1906   } else if (isbinary) {
1907     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1908   } else if (isdraw) {
1909     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1910   } else {
1911     Mat B;
1912     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1913     ierr = MatView(B,viewer);CHKERRQ(ierr);
1914     ierr = MatDestroy(&B);CHKERRQ(ierr);
1915   }
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1922 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1923 {
1924   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1925   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1926   PetscInt    *ai = a->i,*ailen = a->ilen;
1927   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1928   MatScalar   *ap,*aa = a->a;
1929 
1930   PetscFunctionBegin;
1931   for (k=0; k<m; k++) { /* loop over rows */
1932     row  = im[k]; brow = row/bs;
1933     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1934     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1935     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1936     nrow = ailen[brow];
1937     for (l=0; l<n; l++) { /* loop over columns */
1938       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1939       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1940       col  = in[l] ;
1941       bcol = col/bs;
1942       cidx = col%bs;
1943       ridx = row%bs;
1944       high = nrow;
1945       low  = 0; /* assume unsorted */
1946       while (high-low > 5) {
1947         t = (low+high)/2;
1948         if (rp[t] > bcol) high = t;
1949         else             low  = t;
1950       }
1951       for (i=low; i<high; i++) {
1952         if (rp[i] > bcol) break;
1953         if (rp[i] == bcol) {
1954           *v++ = ap[bs2*i+bs*cidx+ridx];
1955           goto finished;
1956         }
1957       }
1958       *v++ = 0.0;
1959       finished:;
1960     }
1961   }
1962   PetscFunctionReturn(0);
1963 }
1964 
1965 #undef __FUNCT__
1966 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1967 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1968 {
1969   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1970   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1971   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1972   PetscErrorCode    ierr;
1973   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1974   PetscBool         roworiented=a->roworiented;
1975   const PetscScalar *value = v;
1976   MatScalar         *ap,*aa = a->a,*bap;
1977 
1978   PetscFunctionBegin;
1979   if (roworiented) {
1980     stepval = (n-1)*bs;
1981   } else {
1982     stepval = (m-1)*bs;
1983   }
1984   for (k=0; k<m; k++) { /* loop over added rows */
1985     row  = im[k];
1986     if (row < 0) continue;
1987 #if defined(PETSC_USE_DEBUG)
1988     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1989 #endif
1990     rp   = aj + ai[row];
1991     ap   = aa + bs2*ai[row];
1992     rmax = imax[row];
1993     nrow = ailen[row];
1994     low  = 0;
1995     high = nrow;
1996     for (l=0; l<n; l++) { /* loop over added columns */
1997       if (in[l] < 0) continue;
1998 #if defined(PETSC_USE_DEBUG)
1999       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
2000 #endif
2001       col = in[l];
2002       if (roworiented) {
2003         value = v + (k*(stepval+bs) + l)*bs;
2004       } else {
2005         value = v + (l*(stepval+bs) + k)*bs;
2006       }
2007       if (col <= lastcol) low = 0; else high = nrow;
2008       lastcol = col;
2009       while (high-low > 7) {
2010         t = (low+high)/2;
2011         if (rp[t] > col) high = t;
2012         else             low  = t;
2013       }
2014       for (i=low; i<high; i++) {
2015         if (rp[i] > col) break;
2016         if (rp[i] == col) {
2017           bap  = ap +  bs2*i;
2018           if (roworiented) {
2019             if (is == ADD_VALUES) {
2020               for (ii=0; ii<bs; ii++,value+=stepval) {
2021                 for (jj=ii; jj<bs2; jj+=bs) {
2022                   bap[jj] += *value++;
2023                 }
2024               }
2025             } else {
2026               for (ii=0; ii<bs; ii++,value+=stepval) {
2027                 for (jj=ii; jj<bs2; jj+=bs) {
2028                   bap[jj] = *value++;
2029                 }
2030               }
2031             }
2032           } else {
2033             if (is == ADD_VALUES) {
2034               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2035                 for (jj=0; jj<bs; jj++) {
2036                   bap[jj] += value[jj];
2037                 }
2038                 bap += bs;
2039               }
2040             } else {
2041               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2042                 for (jj=0; jj<bs; jj++) {
2043                   bap[jj]  = value[jj];
2044                 }
2045                 bap += bs;
2046               }
2047             }
2048           }
2049           goto noinsert2;
2050         }
2051       }
2052       if (nonew == 1) goto noinsert2;
2053       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2054       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2055       N = nrow++ - 1; high++;
2056       /* shift up all the later entries in this row */
2057       for (ii=N; ii>=i; ii--) {
2058         rp[ii+1] = rp[ii];
2059         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2060       }
2061       if (N >= i) {
2062         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2063       }
2064       rp[i] = col;
2065       bap   = ap +  bs2*i;
2066       if (roworiented) {
2067         for (ii=0; ii<bs; ii++,value+=stepval) {
2068           for (jj=ii; jj<bs2; jj+=bs) {
2069             bap[jj] = *value++;
2070           }
2071         }
2072       } else {
2073         for (ii=0; ii<bs; ii++,value+=stepval) {
2074           for (jj=0; jj<bs; jj++) {
2075             *bap++  = *value++;
2076           }
2077         }
2078       }
2079       noinsert2:;
2080       low = i;
2081     }
2082     ailen[row] = nrow;
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2089 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2090 {
2091   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2092   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2093   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2094   PetscErrorCode ierr;
2095   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2096   MatScalar      *aa = a->a,*ap;
2097   PetscReal      ratio=0.6;
2098 
2099   PetscFunctionBegin;
2100   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2101 
2102   if (m) rmax = ailen[0];
2103   for (i=1; i<mbs; i++) {
2104     /* move each row back by the amount of empty slots (fshift) before it*/
2105     fshift += imax[i-1] - ailen[i-1];
2106     rmax   = PetscMax(rmax,ailen[i]);
2107     if (fshift) {
2108       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2109       N = ailen[i];
2110       for (j=0; j<N; j++) {
2111         ip[j-fshift] = ip[j];
2112         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2113       }
2114     }
2115     ai[i] = ai[i-1] + ailen[i-1];
2116   }
2117   if (mbs) {
2118     fshift += imax[mbs-1] - ailen[mbs-1];
2119     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2120   }
2121   /* reset ilen and imax for each row */
2122   for (i=0; i<mbs; i++) {
2123     ailen[i] = imax[i] = ai[i+1] - ai[i];
2124   }
2125   a->nz = ai[mbs];
2126 
2127   /* diagonals may have moved, so kill the diagonal pointers */
2128   a->idiagvalid = PETSC_FALSE;
2129   if (fshift && a->diag) {
2130     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2131     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2132     a->diag = 0;
2133   }
2134   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2135   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2136   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2137   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2138   A->info.mallocs     += a->reallocs;
2139   a->reallocs          = 0;
2140   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2141 
2142   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2143   A->same_nonzero = PETSC_TRUE;
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 /*
2148    This function returns an array of flags which indicate the locations of contiguous
2149    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2150    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2151    Assume: sizes should be long enough to hold all the values.
2152 */
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2155 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2156 {
2157   PetscInt   i,j,k,row;
2158   PetscBool  flg;
2159 
2160   PetscFunctionBegin;
2161   for (i=0,j=0; i<n; j++) {
2162     row = idx[i];
2163     if (row%bs!=0) { /* Not the begining of a block */
2164       sizes[j] = 1;
2165       i++;
2166     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2167       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2168       i++;
2169     } else { /* Begining of the block, so check if the complete block exists */
2170       flg = PETSC_TRUE;
2171       for (k=1; k<bs; k++) {
2172         if (row+k != idx[i+k]) { /* break in the block */
2173           flg = PETSC_FALSE;
2174           break;
2175         }
2176       }
2177       if (flg) { /* No break in the bs */
2178         sizes[j] = bs;
2179         i+= bs;
2180       } else {
2181         sizes[j] = 1;
2182         i++;
2183       }
2184     }
2185   }
2186   *bs_max = j;
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 #undef __FUNCT__
2191 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2192 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2193 {
2194   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2195   PetscErrorCode    ierr;
2196   PetscInt          i,j,k,count,*rows;
2197   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2198   PetscScalar       zero = 0.0;
2199   MatScalar         *aa;
2200   const PetscScalar *xx;
2201   PetscScalar       *bb;
2202 
2203   PetscFunctionBegin;
2204   /* fix right hand side if needed */
2205   if (x && b) {
2206     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2207     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2208     for (i=0; i<is_n; i++) {
2209       bb[is_idx[i]] = diag*xx[is_idx[i]];
2210     }
2211     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2212     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2213   }
2214 
2215   /* Make a copy of the IS and  sort it */
2216   /* allocate memory for rows,sizes */
2217   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2218 
2219   /* copy IS values to rows, and sort them */
2220   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2221   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2222 
2223   if (baij->keepnonzeropattern) {
2224     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2225     bs_max = is_n;
2226     A->same_nonzero = PETSC_TRUE;
2227   } else {
2228     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2229     A->same_nonzero = PETSC_FALSE;
2230   }
2231 
2232   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2233     row   = rows[j];
2234     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2235     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2236     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2237     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2238       if (diag != (PetscScalar)0.0) {
2239         if (baij->ilen[row/bs] > 0) {
2240           baij->ilen[row/bs]       = 1;
2241           baij->j[baij->i[row/bs]] = row/bs;
2242           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2243         }
2244         /* Now insert all the diagonal values for this bs */
2245         for (k=0; k<bs; k++) {
2246           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2247         }
2248       } else { /* (diag == 0.0) */
2249         baij->ilen[row/bs] = 0;
2250       } /* end (diag == 0.0) */
2251     } else { /* (sizes[i] != bs) */
2252 #if defined (PETSC_USE_DEBUG)
2253       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2254 #endif
2255       for (k=0; k<count; k++) {
2256         aa[0] =  zero;
2257         aa    += bs;
2258       }
2259       if (diag != (PetscScalar)0.0) {
2260         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2261       }
2262     }
2263   }
2264 
2265   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2266   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 #undef __FUNCT__
2271 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2272 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2273 {
2274   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2275   PetscErrorCode    ierr;
2276   PetscInt          i,j,k,count;
2277   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2278   PetscScalar       zero = 0.0;
2279   MatScalar         *aa;
2280   const PetscScalar *xx;
2281   PetscScalar       *bb;
2282   PetscBool         *zeroed,vecs = PETSC_FALSE;
2283 
2284   PetscFunctionBegin;
2285   /* fix right hand side if needed */
2286   if (x && b) {
2287     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2288     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2289     vecs = PETSC_TRUE;
2290   }
2291   A->same_nonzero = PETSC_TRUE;
2292 
2293   /* zero the columns */
2294   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2295   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2296   for (i=0; i<is_n; i++) {
2297     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2298     zeroed[is_idx[i]] = PETSC_TRUE;
2299   }
2300   for (i=0; i<A->rmap->N; i++) {
2301     if (!zeroed[i]) {
2302       row = i/bs;
2303       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2304         for (k=0; k<bs; k++) {
2305           col = bs*baij->j[j] + k;
2306           if (zeroed[col]) {
2307             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2308             if (vecs) bb[i] -= aa[0]*xx[col];
2309             aa[0] = 0.0;
2310           }
2311         }
2312       }
2313     } else if (vecs) bb[i] = diag*xx[i];
2314   }
2315   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2316   if (vecs) {
2317     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2318     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2319   }
2320 
2321   /* zero the rows */
2322   for (i=0; i<is_n; i++) {
2323     row   = is_idx[i];
2324     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2325     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2326     for (k=0; k<count; k++) {
2327       aa[0] =  zero;
2328       aa    += bs;
2329     }
2330     if (diag != (PetscScalar)0.0) {
2331       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2332     }
2333   }
2334   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 #undef __FUNCT__
2339 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2340 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2341 {
2342   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2343   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2344   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2345   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2346   PetscErrorCode ierr;
2347   PetscInt       ridx,cidx,bs2=a->bs2;
2348   PetscBool      roworiented=a->roworiented;
2349   MatScalar      *ap,value,*aa=a->a,*bap;
2350 
2351   PetscFunctionBegin;
2352   if (v) PetscValidScalarPointer(v,6);
2353   for (k=0; k<m; k++) { /* loop over added rows */
2354     row  = im[k];
2355     brow = row/bs;
2356     if (row < 0) continue;
2357 #if defined(PETSC_USE_DEBUG)
2358     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2359 #endif
2360     rp   = aj + ai[brow];
2361     ap   = aa + bs2*ai[brow];
2362     rmax = imax[brow];
2363     nrow = ailen[brow];
2364     low  = 0;
2365     high = nrow;
2366     for (l=0; l<n; l++) { /* loop over added columns */
2367       if (in[l] < 0) continue;
2368 #if defined(PETSC_USE_DEBUG)
2369       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2370 #endif
2371       col = in[l]; bcol = col/bs;
2372       ridx = row % bs; cidx = col % bs;
2373       if (roworiented) {
2374         value = v[l + k*n];
2375       } else {
2376         value = v[k + l*m];
2377       }
2378       if (col <= lastcol) low = 0; else high = nrow;
2379       lastcol = col;
2380       while (high-low > 7) {
2381         t = (low+high)/2;
2382         if (rp[t] > bcol) high = t;
2383         else              low  = t;
2384       }
2385       for (i=low; i<high; i++) {
2386         if (rp[i] > bcol) break;
2387         if (rp[i] == bcol) {
2388           bap  = ap +  bs2*i + bs*cidx + ridx;
2389           if (is == ADD_VALUES) *bap += value;
2390           else                  *bap  = value;
2391           goto noinsert1;
2392         }
2393       }
2394       if (nonew == 1) goto noinsert1;
2395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2396       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2397       N = nrow++ - 1; high++;
2398       /* shift up all the later entries in this row */
2399       for (ii=N; ii>=i; ii--) {
2400         rp[ii+1] = rp[ii];
2401         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2402       }
2403       if (N>=i) {
2404         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2405       }
2406       rp[i]                      = bcol;
2407       ap[bs2*i + bs*cidx + ridx] = value;
2408       a->nz++;
2409       noinsert1:;
2410       low = i;
2411     }
2412     ailen[brow] = nrow;
2413   }
2414   A->same_nonzero = PETSC_FALSE;
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 #undef __FUNCT__
2419 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2420 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2421 {
2422   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2423   Mat            outA;
2424   PetscErrorCode ierr;
2425   PetscBool      row_identity,col_identity;
2426 
2427   PetscFunctionBegin;
2428   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2429   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2430   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2431   if (!row_identity || !col_identity) {
2432     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2433   }
2434 
2435   outA            = inA;
2436   inA->factortype = MAT_FACTOR_LU;
2437 
2438   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2439 
2440   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2441   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2442   a->row = row;
2443   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2444   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2445   a->col = col;
2446 
2447   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2448   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2449    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2450   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2451 
2452   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2453   if (!a->solve_work) {
2454     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2455     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2456   }
2457   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2458 
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 EXTERN_C_BEGIN
2463 #undef __FUNCT__
2464 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2465 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2466 {
2467   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2468   PetscInt    i,nz,mbs;
2469 
2470   PetscFunctionBegin;
2471   nz  = baij->maxnz;
2472   mbs = baij->mbs;
2473   for (i=0; i<nz; i++) {
2474     baij->j[i] = indices[i];
2475   }
2476   baij->nz = nz;
2477   for (i=0; i<mbs; i++) {
2478     baij->ilen[i] = baij->imax[i];
2479   }
2480   PetscFunctionReturn(0);
2481 }
2482 EXTERN_C_END
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2486 /*@
2487     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2488        in the matrix.
2489 
2490   Input Parameters:
2491 +  mat - the SeqBAIJ matrix
2492 -  indices - the column indices
2493 
2494   Level: advanced
2495 
2496   Notes:
2497     This can be called if you have precomputed the nonzero structure of the
2498   matrix and want to provide it to the matrix object to improve the performance
2499   of the MatSetValues() operation.
2500 
2501     You MUST have set the correct numbers of nonzeros per row in the call to
2502   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2503 
2504     MUST be called before any calls to MatSetValues();
2505 
2506 @*/
2507 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2508 {
2509   PetscErrorCode ierr;
2510 
2511   PetscFunctionBegin;
2512   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2513   PetscValidPointer(indices,2);
2514   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2520 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2521 {
2522   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2523   PetscErrorCode ierr;
2524   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2525   PetscReal      atmp;
2526   PetscScalar    *x,zero = 0.0;
2527   MatScalar      *aa;
2528   PetscInt       ncols,brow,krow,kcol;
2529 
2530   PetscFunctionBegin;
2531   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2532   bs   = A->rmap->bs;
2533   aa   = a->a;
2534   ai   = a->i;
2535   aj   = a->j;
2536   mbs  = a->mbs;
2537 
2538   ierr = VecSet(v,zero);CHKERRQ(ierr);
2539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2540   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2541   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2542   for (i=0; i<mbs; i++) {
2543     ncols = ai[1] - ai[0]; ai++;
2544     brow  = bs*i;
2545     for (j=0; j<ncols; j++) {
2546       for (kcol=0; kcol<bs; kcol++) {
2547         for (krow=0; krow<bs; krow++) {
2548           atmp = PetscAbsScalar(*aa);aa++;
2549           row = brow + krow;    /* row index */
2550           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2551           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2552         }
2553       }
2554       aj++;
2555     }
2556   }
2557   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatCopy_SeqBAIJ"
2563 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2564 {
2565   PetscErrorCode ierr;
2566 
2567   PetscFunctionBegin;
2568   /* If the two matrices have the same copy implementation, use fast copy. */
2569   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2570     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2571     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2572     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2573 
2574     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2575     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2576     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2577   } else {
2578     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2579   }
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 #undef __FUNCT__
2584 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2585 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2586 {
2587   PetscErrorCode ierr;
2588 
2589   PetscFunctionBegin;
2590   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 #undef __FUNCT__
2595 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2596 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2597 {
2598   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2599 
2600   PetscFunctionBegin;
2601   *array = a->a;
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 #undef __FUNCT__
2606 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2607 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2608 {
2609   PetscFunctionBegin;
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 #undef __FUNCT__
2614 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2615 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2616 {
2617   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2618   PetscErrorCode ierr;
2619   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2620   PetscBLASInt   one=1;
2621 
2622   PetscFunctionBegin;
2623   if (str == SAME_NONZERO_PATTERN) {
2624     PetscScalar alpha = a;
2625     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2626     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2627   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2628     if (y->xtoy && y->XtoY != X) {
2629       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2630       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2631     }
2632     if (!y->xtoy) { /* get xtoy */
2633       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2634       y->XtoY = X;
2635       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2636     }
2637     for (i=0; i<x->nz; i++) {
2638       j = 0;
2639       while (j < bs2) {
2640         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2641         j++;
2642       }
2643     }
2644     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2645   } else {
2646     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2647   }
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 #undef __FUNCT__
2652 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2653 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2654 {
2655   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2656   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2657   MatScalar      *aa = a->a;
2658 
2659   PetscFunctionBegin;
2660   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 #undef __FUNCT__
2665 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2666 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2667 {
2668   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2669   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2670   MatScalar      *aa = a->a;
2671 
2672   PetscFunctionBegin;
2673   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2678 
2679 #undef __FUNCT__
2680 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2681 /*
2682     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2683 */
2684 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2685 {
2686   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2687   PetscErrorCode ierr;
2688   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2689   PetscInt       nz = a->i[m],row,*jj,mr,col;
2690 
2691   PetscFunctionBegin;
2692   *nn = n;
2693   if (!ia) PetscFunctionReturn(0);
2694   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2695   else {
2696     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2697     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2698     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2699     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2700     jj = a->j;
2701     for (i=0; i<nz; i++) {
2702       collengths[jj[i]]++;
2703     }
2704     cia[0] = oshift;
2705     for (i=0; i<n; i++) {
2706       cia[i+1] = cia[i] + collengths[i];
2707     }
2708     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2709     jj   = a->j;
2710     for (row=0; row<m; row++) {
2711       mr = a->i[row+1] - a->i[row];
2712       for (i=0; i<mr; i++) {
2713         col = *jj++;
2714         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2715       }
2716     }
2717     ierr = PetscFree(collengths);CHKERRQ(ierr);
2718     *ia = cia; *ja = cja;
2719   }
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 #undef __FUNCT__
2724 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2725 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2726 {
2727   PetscErrorCode ierr;
2728 
2729   PetscFunctionBegin;
2730   if (!ia) PetscFunctionReturn(0);
2731   ierr = PetscFree(*ia);CHKERRQ(ierr);
2732   ierr = PetscFree(*ja);CHKERRQ(ierr);
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2738 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2739 {
2740   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2741   PetscErrorCode ierr;
2742   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2743   PetscScalar    dx,*y,*xx,*w3_array;
2744   PetscScalar    *vscale_array;
2745   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2746   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2747   void           *fctx = coloring->fctx;
2748   PetscBool      flg = PETSC_FALSE;
2749   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2750   Vec            x1_tmp;
2751 
2752   PetscFunctionBegin;
2753   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2754   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2755   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2756   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2757 
2758   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2759   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2760   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2761   if (flg) {
2762     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2763   } else {
2764     PetscBool  assembled;
2765     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2766     if (assembled) {
2767       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2768     }
2769   }
2770 
2771   x1_tmp = x1;
2772   if (!coloring->vscale) {
2773     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2774   }
2775 
2776   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2777     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2778   }
2779   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2780 
2781   /* Set w1 = F(x1) */
2782   if (!coloring->fset) {
2783     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2784     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2785     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2786   } else {
2787     coloring->fset = PETSC_FALSE;
2788   }
2789 
2790   if (!coloring->w3) {
2791     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2792     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2793   }
2794   w3 = coloring->w3;
2795 
2796     CHKMEMQ;
2797     /* Compute all the local scale factors, including ghost points */
2798   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2799   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2800   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2801   if (ctype == IS_COLORING_GHOSTED) {
2802     col_start = 0; col_end = N;
2803   } else if (ctype == IS_COLORING_GLOBAL) {
2804     xx = xx - start;
2805     vscale_array = vscale_array - start;
2806     col_start = start; col_end = N + start;
2807   }    CHKMEMQ;
2808   for (col=col_start; col<col_end; col++) {
2809     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2810     if (coloring->htype[0] == 'w') {
2811       dx = 1.0 + unorm;
2812     } else {
2813       dx  = xx[col];
2814     }
2815     if (dx == (PetscScalar)0.0) dx = 1.0;
2816     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2817     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2818     dx               *= epsilon;
2819     vscale_array[col] = (PetscScalar)1.0/dx;
2820   }     CHKMEMQ;
2821   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2822   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2823   if (ctype == IS_COLORING_GLOBAL) {
2824     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2825     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2826   }
2827   CHKMEMQ;
2828   if (coloring->vscaleforrow) {
2829     vscaleforrow = coloring->vscaleforrow;
2830   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2831 
2832   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2833   /*
2834     Loop over each color
2835   */
2836   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2837   for (k=0; k<coloring->ncolors; k++) {
2838     coloring->currentcolor = k;
2839     for (i=0; i<bs; i++) {
2840       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2841       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2842       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2843       /*
2844         Loop over each column associated with color
2845         adding the perturbation to the vector w3.
2846       */
2847       for (l=0; l<coloring->ncolumns[k]; l++) {
2848         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2849         if (coloring->htype[0] == 'w') {
2850           dx = 1.0 + unorm;
2851         } else {
2852           dx  = xx[col];
2853         }
2854         if (dx == (PetscScalar)0.0) dx = 1.0;
2855         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2856         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2857         dx            *= epsilon;
2858         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2859         w3_array[col] += dx;
2860       }
2861       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2862       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2863 
2864       /*
2865         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2866         w2 = F(x1 + dx) - F(x1)
2867       */
2868       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2869       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2870       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2871       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2872 
2873       /*
2874         Loop over rows of vector, putting results into Jacobian matrix
2875       */
2876       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2877       for (l=0; l<coloring->nrows[k]; l++) {
2878         row    = bs*coloring->rows[k][l];             /* local row index */
2879         col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2880         for (j=0; j<bs; j++) {
2881           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2882           srows[j]  = row + start + j;
2883         }
2884         ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2885       }
2886       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2887     }
2888   } /* endof for each color */
2889   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2890   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2891   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2892   ierr = PetscFree(srows);CHKERRQ(ierr);
2893 
2894   coloring->currentcolor = -1;
2895   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2896   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2897   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2898   PetscFunctionReturn(0);
2899 }
2900 
2901 /* -------------------------------------------------------------------*/
2902 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2903        MatGetRow_SeqBAIJ,
2904        MatRestoreRow_SeqBAIJ,
2905        MatMult_SeqBAIJ_N,
2906 /* 4*/ MatMultAdd_SeqBAIJ_N,
2907        MatMultTranspose_SeqBAIJ,
2908        MatMultTransposeAdd_SeqBAIJ,
2909        0,
2910        0,
2911        0,
2912 /*10*/ 0,
2913        MatLUFactor_SeqBAIJ,
2914        0,
2915        0,
2916        MatTranspose_SeqBAIJ,
2917 /*15*/ MatGetInfo_SeqBAIJ,
2918        MatEqual_SeqBAIJ,
2919        MatGetDiagonal_SeqBAIJ,
2920        MatDiagonalScale_SeqBAIJ,
2921        MatNorm_SeqBAIJ,
2922 /*20*/ 0,
2923        MatAssemblyEnd_SeqBAIJ,
2924        MatSetOption_SeqBAIJ,
2925        MatZeroEntries_SeqBAIJ,
2926 /*24*/ MatZeroRows_SeqBAIJ,
2927        0,
2928        0,
2929        0,
2930        0,
2931 /*29*/ MatSetUp_SeqBAIJ,
2932        0,
2933        0,
2934        0,
2935        0,
2936 /*34*/ MatDuplicate_SeqBAIJ,
2937        0,
2938        0,
2939        MatILUFactor_SeqBAIJ,
2940        0,
2941 /*39*/ MatAXPY_SeqBAIJ,
2942        MatGetSubMatrices_SeqBAIJ,
2943        MatIncreaseOverlap_SeqBAIJ,
2944        MatGetValues_SeqBAIJ,
2945        MatCopy_SeqBAIJ,
2946 /*44*/ 0,
2947        MatScale_SeqBAIJ,
2948        0,
2949        0,
2950        MatZeroRowsColumns_SeqBAIJ,
2951 /*49*/ 0,
2952        MatGetRowIJ_SeqBAIJ,
2953        MatRestoreRowIJ_SeqBAIJ,
2954        MatGetColumnIJ_SeqBAIJ,
2955        MatRestoreColumnIJ_SeqBAIJ,
2956 /*54*/ MatFDColoringCreate_SeqAIJ,
2957        0,
2958        0,
2959        0,
2960        MatSetValuesBlocked_SeqBAIJ,
2961 /*59*/ MatGetSubMatrix_SeqBAIJ,
2962        MatDestroy_SeqBAIJ,
2963        MatView_SeqBAIJ,
2964        0,
2965        0,
2966 /*64*/ 0,
2967        0,
2968        0,
2969        0,
2970        0,
2971 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2972        0,
2973        MatConvert_Basic,
2974        0,
2975        0,
2976 /*74*/ 0,
2977        MatFDColoringApply_BAIJ,
2978        0,
2979        0,
2980        0,
2981 /*79*/ 0,
2982        0,
2983        0,
2984        0,
2985        MatLoad_SeqBAIJ,
2986 /*84*/ 0,
2987        0,
2988        0,
2989        0,
2990        0,
2991 /*89*/ 0,
2992        0,
2993        0,
2994        0,
2995        0,
2996 /*94*/ 0,
2997        0,
2998        0,
2999        0,
3000        0,
3001 /*99*/0,
3002        0,
3003        0,
3004        0,
3005        0,
3006 /*104*/0,
3007        MatRealPart_SeqBAIJ,
3008        MatImaginaryPart_SeqBAIJ,
3009        0,
3010        0,
3011 /*109*/0,
3012        0,
3013        0,
3014        0,
3015        MatMissingDiagonal_SeqBAIJ,
3016 /*114*/0,
3017        0,
3018        0,
3019        0,
3020        0,
3021 /*119*/0,
3022        0,
3023        MatMultHermitianTranspose_SeqBAIJ,
3024        MatMultHermitianTransposeAdd_SeqBAIJ,
3025        0,
3026 /*124*/0,
3027        0,
3028        MatInvertBlockDiagonal_SeqBAIJ
3029 };
3030 
3031 EXTERN_C_BEGIN
3032 #undef __FUNCT__
3033 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3034 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3035 {
3036   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3037   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3038   PetscErrorCode ierr;
3039 
3040   PetscFunctionBegin;
3041   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3042 
3043   /* allocate space for values if not already there */
3044   if (!aij->saved_values) {
3045     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3046     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3047   }
3048 
3049   /* copy values over */
3050   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3051   PetscFunctionReturn(0);
3052 }
3053 EXTERN_C_END
3054 
3055 EXTERN_C_BEGIN
3056 #undef __FUNCT__
3057 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3058 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3059 {
3060   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3061   PetscErrorCode ierr;
3062   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3063 
3064   PetscFunctionBegin;
3065   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3066   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3067 
3068   /* copy values over */
3069   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3070   PetscFunctionReturn(0);
3071 }
3072 EXTERN_C_END
3073 
3074 EXTERN_C_BEGIN
3075 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3076 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3077 EXTERN_C_END
3078 
3079 EXTERN_C_BEGIN
3080 #undef __FUNCT__
3081 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3082 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3083 {
3084   Mat_SeqBAIJ    *b;
3085   PetscErrorCode ierr;
3086   PetscInt       i,mbs,nbs,bs2;
3087   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3088 
3089   PetscFunctionBegin;
3090   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3091   if (nz == MAT_SKIP_ALLOCATION) {
3092     skipallocation = PETSC_TRUE;
3093     nz             = 0;
3094   }
3095 
3096   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3097   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3098   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3099   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3100   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3101 
3102   B->preallocated = PETSC_TRUE;
3103 
3104   mbs  = B->rmap->n/bs;
3105   nbs  = B->cmap->n/bs;
3106   bs2  = bs*bs;
3107 
3108   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3109 
3110   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3111   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3112   if (nnz) {
3113     for (i=0; i<mbs; i++) {
3114       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3115       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3116     }
3117   }
3118 
3119   b       = (Mat_SeqBAIJ*)B->data;
3120   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3121     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3122   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3123 
3124   if (!flg) {
3125     switch (bs) {
3126     case 1:
3127       B->ops->mult            = MatMult_SeqBAIJ_1;
3128       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3129       B->ops->sor             = MatSOR_SeqBAIJ_1;
3130       break;
3131     case 2:
3132       B->ops->mult            = MatMult_SeqBAIJ_2;
3133       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3134       B->ops->sor             = MatSOR_SeqBAIJ_2;
3135       break;
3136     case 3:
3137       B->ops->mult            = MatMult_SeqBAIJ_3;
3138       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3139       B->ops->sor             = MatSOR_SeqBAIJ_3;
3140       break;
3141     case 4:
3142       B->ops->mult            = MatMult_SeqBAIJ_4;
3143       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3144       B->ops->sor             = MatSOR_SeqBAIJ_4;
3145       break;
3146     case 5:
3147       B->ops->mult            = MatMult_SeqBAIJ_5;
3148       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3149       B->ops->sor             = MatSOR_SeqBAIJ_5;
3150       break;
3151     case 6:
3152       B->ops->mult            = MatMult_SeqBAIJ_6;
3153       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3154       B->ops->sor             = MatSOR_SeqBAIJ_6;
3155       break;
3156     case 7:
3157       B->ops->mult            = MatMult_SeqBAIJ_7;
3158       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3159       B->ops->sor             = MatSOR_SeqBAIJ_7;
3160       break;
3161     case 15:
3162       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3163       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3164       B->ops->sor             = MatSOR_SeqBAIJ_N;
3165       break;
3166     default:
3167       B->ops->mult            = MatMult_SeqBAIJ_N;
3168       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3169       B->ops->sor             = MatSOR_SeqBAIJ_N;
3170       break;
3171     }
3172   }
3173   b->mbs       = mbs;
3174   b->nbs       = nbs;
3175   if (!skipallocation) {
3176     if (!b->imax) {
3177       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3178       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3179       b->free_imax_ilen = PETSC_TRUE;
3180     }
3181     /* b->ilen will count nonzeros in each block row so far. */
3182     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3183     if (!nnz) {
3184       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3185       else if (nz < 0) nz = 1;
3186       for (i=0; i<mbs; i++) b->imax[i] = nz;
3187       nz = nz*mbs;
3188     } else {
3189       nz = 0;
3190       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3191     }
3192 
3193     /* allocate the matrix space */
3194     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3195     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3196     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3197     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3198     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3199     b->singlemalloc = PETSC_TRUE;
3200     b->i[0] = 0;
3201     for (i=1; i<mbs+1; i++) {
3202       b->i[i] = b->i[i-1] + b->imax[i-1];
3203     }
3204     b->free_a     = PETSC_TRUE;
3205     b->free_ij    = PETSC_TRUE;
3206   } else {
3207     b->free_a     = PETSC_FALSE;
3208     b->free_ij    = PETSC_FALSE;
3209   }
3210 
3211   b->bs2              = bs2;
3212   b->mbs              = mbs;
3213   b->nz               = 0;
3214   b->maxnz            = nz;
3215   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3216   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3217   PetscFunctionReturn(0);
3218 }
3219 EXTERN_C_END
3220 
3221 EXTERN_C_BEGIN
3222 #undef __FUNCT__
3223 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3224 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3225 {
3226   PetscInt       i,m,nz,nz_max=0,*nnz;
3227   PetscScalar    *values=0;
3228   PetscErrorCode ierr;
3229 
3230   PetscFunctionBegin;
3231   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3232   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3233   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3234   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3235   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3236   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3237   m = B->rmap->n/bs;
3238 
3239   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3240   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3241   for (i=0; i<m; i++) {
3242     nz = ii[i+1]- ii[i];
3243     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3244     nz_max = PetscMax(nz_max, nz);
3245     nnz[i] = nz;
3246   }
3247   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3248   ierr = PetscFree(nnz);CHKERRQ(ierr);
3249 
3250   values = (PetscScalar*)V;
3251   if (!values) {
3252     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3253     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3254   }
3255   for (i=0; i<m; i++) {
3256     PetscInt          ncols  = ii[i+1] - ii[i];
3257     const PetscInt    *icols = jj + ii[i];
3258     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3259     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3260   }
3261   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3262   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3263   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3264   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3265   PetscFunctionReturn(0);
3266 }
3267 EXTERN_C_END
3268 
3269 
3270 EXTERN_C_BEGIN
3271 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3272 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3273 #if defined(PETSC_HAVE_MUMPS)
3274 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3275 #endif
3276 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3277 EXTERN_C_END
3278 
3279 /*MC
3280    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3281    block sparse compressed row format.
3282 
3283    Options Database Keys:
3284 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3285 
3286   Level: beginner
3287 
3288 .seealso: MatCreateSeqBAIJ()
3289 M*/
3290 
3291 EXTERN_C_BEGIN
3292 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3293 EXTERN_C_END
3294 
3295 EXTERN_C_BEGIN
3296 #undef __FUNCT__
3297 #define __FUNCT__ "MatCreate_SeqBAIJ"
3298 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3299 {
3300   PetscErrorCode ierr;
3301   PetscMPIInt    size;
3302   Mat_SeqBAIJ    *b;
3303 
3304   PetscFunctionBegin;
3305   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3306   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3307 
3308   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3309   B->data = (void*)b;
3310   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3311   b->row                   = 0;
3312   b->col                   = 0;
3313   b->icol                  = 0;
3314   b->reallocs              = 0;
3315   b->saved_values          = 0;
3316 
3317   b->roworiented           = PETSC_TRUE;
3318   b->nonew                 = 0;
3319   b->diag                  = 0;
3320   b->solve_work            = 0;
3321   b->mult_work             = 0;
3322   B->spptr                 = 0;
3323   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3324   b->keepnonzeropattern    = PETSC_FALSE;
3325   b->xtoy                  = 0;
3326   b->XtoY                  = 0;
3327   B->same_nonzero          = PETSC_FALSE;
3328 
3329   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3330                                      "MatGetFactorAvailable_seqbaij_petsc",
3331                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3332   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3333                                      "MatGetFactor_seqbaij_petsc",
3334                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3335   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3336                                      "MatGetFactor_seqbaij_bstrm",
3337                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3338 #if defined(PETSC_HAVE_MUMPS)
3339   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3340 #endif
3341   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3342                                      "MatInvertBlockDiagonal_SeqBAIJ",
3343                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3344   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3345                                      "MatStoreValues_SeqBAIJ",
3346                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3347   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3348                                      "MatRetrieveValues_SeqBAIJ",
3349                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3350   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3351                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3352                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3353   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3354                                      "MatConvert_SeqBAIJ_SeqAIJ",
3355                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3356   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3357                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3358                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3359   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3360                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3361                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3362   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3363                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3364                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3365   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3366                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3367                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3368   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3369                                      "MatIsTranspose_SeqBAIJ",
3370                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3371   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3372   PetscFunctionReturn(0);
3373 }
3374 EXTERN_C_END
3375 
3376 #undef __FUNCT__
3377 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3378 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3379 {
3380   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3381   PetscErrorCode ierr;
3382   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3383 
3384   PetscFunctionBegin;
3385   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3386 
3387   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3388     c->imax = a->imax;
3389     c->ilen = a->ilen;
3390     c->free_imax_ilen = PETSC_FALSE;
3391   } else {
3392     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3393     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3394     for (i=0; i<mbs; i++) {
3395       c->imax[i] = a->imax[i];
3396       c->ilen[i] = a->ilen[i];
3397     }
3398     c->free_imax_ilen = PETSC_TRUE;
3399   }
3400 
3401   /* allocate the matrix space */
3402   if (mallocmatspace) {
3403     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3404       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3405       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3406       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3407       c->i            = a->i;
3408       c->j            = a->j;
3409       c->singlemalloc = PETSC_FALSE;
3410       c->free_a       = PETSC_TRUE;
3411       c->free_ij      = PETSC_FALSE;
3412       c->parent       = A;
3413       C->preallocated = PETSC_TRUE;
3414       C->assembled    = PETSC_TRUE;
3415       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3416       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3417       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3418     } else {
3419       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3420       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3421       c->singlemalloc = PETSC_TRUE;
3422       c->free_a       = PETSC_TRUE;
3423       c->free_ij      = PETSC_TRUE;
3424       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3425       if (mbs > 0) {
3426         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3427         if (cpvalues == MAT_COPY_VALUES) {
3428           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3429         } else {
3430           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3431         }
3432       }
3433       C->preallocated = PETSC_TRUE;
3434       C->assembled    = PETSC_TRUE;
3435     }
3436   }
3437 
3438   c->roworiented = a->roworiented;
3439   c->nonew       = a->nonew;
3440   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3441   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3442   c->bs2         = a->bs2;
3443   c->mbs         = a->mbs;
3444   c->nbs         = a->nbs;
3445 
3446   if (a->diag) {
3447     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3448       c->diag      = a->diag;
3449       c->free_diag = PETSC_FALSE;
3450     } else {
3451       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3452       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3453       for (i=0; i<mbs; i++) {
3454         c->diag[i] = a->diag[i];
3455       }
3456       c->free_diag = PETSC_TRUE;
3457     }
3458   } else c->diag        = 0;
3459   c->nz                 = a->nz;
3460   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3461   c->solve_work         = 0;
3462   c->mult_work          = 0;
3463 
3464   c->compressedrow.use     = a->compressedrow.use;
3465   c->compressedrow.nrows   = a->compressedrow.nrows;
3466   c->compressedrow.check   = a->compressedrow.check;
3467   if (a->compressedrow.use) {
3468     i = a->compressedrow.nrows;
3469     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3470     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3471     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3472     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3473   } else {
3474     c->compressedrow.use    = PETSC_FALSE;
3475     c->compressedrow.i      = PETSC_NULL;
3476     c->compressedrow.rindex = PETSC_NULL;
3477   }
3478   C->same_nonzero = A->same_nonzero;
3479   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3480   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3481   PetscFunctionReturn(0);
3482 }
3483 
3484 #undef __FUNCT__
3485 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3486 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3487 {
3488   PetscErrorCode ierr;
3489 
3490   PetscFunctionBegin;
3491   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3492   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3493   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3494   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3495   PetscFunctionReturn(0);
3496 }
3497 
3498 #undef __FUNCT__
3499 #define __FUNCT__ "MatLoad_SeqBAIJ"
3500 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3501 {
3502   Mat_SeqBAIJ    *a;
3503   PetscErrorCode ierr;
3504   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3505   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3506   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3507   PetscInt       *masked,nmask,tmp,bs2,ishift;
3508   PetscMPIInt    size;
3509   int            fd;
3510   PetscScalar    *aa;
3511   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3512 
3513   PetscFunctionBegin;
3514   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3515   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3516   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3517   bs2  = bs*bs;
3518 
3519   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3520   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3521   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3522   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3523   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3524   M = header[1]; N = header[2]; nz = header[3];
3525 
3526   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3527   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3528 
3529   /*
3530      This code adds extra rows to make sure the number of rows is
3531     divisible by the blocksize
3532   */
3533   mbs        = M/bs;
3534   extra_rows = bs - M + bs*(mbs);
3535   if (extra_rows == bs) extra_rows = 0;
3536   else                  mbs++;
3537   if (extra_rows) {
3538     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3539   }
3540 
3541   /* Set global sizes if not already set */
3542   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3543     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3544   } else { /* Check if the matrix global sizes are correct */
3545     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3546     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3547       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3548     }
3549     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3550   }
3551 
3552   /* read in row lengths */
3553   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3554   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3555   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3556 
3557   /* read in column indices */
3558   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3559   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3560   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3561 
3562   /* loop over row lengths determining block row lengths */
3563   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3564   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3565   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3566   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3567   rowcount = 0;
3568   nzcount = 0;
3569   for (i=0; i<mbs; i++) {
3570     nmask = 0;
3571     for (j=0; j<bs; j++) {
3572       kmax = rowlengths[rowcount];
3573       for (k=0; k<kmax; k++) {
3574         tmp = jj[nzcount++]/bs;
3575         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3576       }
3577       rowcount++;
3578     }
3579     browlengths[i] += nmask;
3580     /* zero out the mask elements we set */
3581     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3582   }
3583 
3584   /* Do preallocation  */
3585   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3586   a = (Mat_SeqBAIJ*)newmat->data;
3587 
3588   /* set matrix "i" values */
3589   a->i[0] = 0;
3590   for (i=1; i<= mbs; i++) {
3591     a->i[i]      = a->i[i-1] + browlengths[i-1];
3592     a->ilen[i-1] = browlengths[i-1];
3593   }
3594   a->nz         = 0;
3595   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3596 
3597   /* read in nonzero values */
3598   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3599   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3600   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3601 
3602   /* set "a" and "j" values into matrix */
3603   nzcount = 0; jcount = 0;
3604   for (i=0; i<mbs; i++) {
3605     nzcountb = nzcount;
3606     nmask    = 0;
3607     for (j=0; j<bs; j++) {
3608       kmax = rowlengths[i*bs+j];
3609       for (k=0; k<kmax; k++) {
3610         tmp = jj[nzcount++]/bs;
3611         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3612       }
3613     }
3614     /* sort the masked values */
3615     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3616 
3617     /* set "j" values into matrix */
3618     maskcount = 1;
3619     for (j=0; j<nmask; j++) {
3620       a->j[jcount++]  = masked[j];
3621       mask[masked[j]] = maskcount++;
3622     }
3623     /* set "a" values into matrix */
3624     ishift = bs2*a->i[i];
3625     for (j=0; j<bs; j++) {
3626       kmax = rowlengths[i*bs+j];
3627       for (k=0; k<kmax; k++) {
3628         tmp       = jj[nzcountb]/bs ;
3629         block     = mask[tmp] - 1;
3630         point     = jj[nzcountb] - bs*tmp;
3631         idx       = ishift + bs2*block + j + bs*point;
3632         a->a[idx] = (MatScalar)aa[nzcountb++];
3633       }
3634     }
3635     /* zero out the mask elements we set */
3636     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3637   }
3638   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3639 
3640   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3641   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3642   ierr = PetscFree(aa);CHKERRQ(ierr);
3643   ierr = PetscFree(jj);CHKERRQ(ierr);
3644   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3645 
3646   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3647   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3648   PetscFunctionReturn(0);
3649 }
3650 
3651 #undef __FUNCT__
3652 #define __FUNCT__ "MatCreateSeqBAIJ"
3653 /*@C
3654    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3655    compressed row) format.  For good matrix assembly performance the
3656    user should preallocate the matrix storage by setting the parameter nz
3657    (or the array nnz).  By setting these parameters accurately, performance
3658    during matrix assembly can be increased by more than a factor of 50.
3659 
3660    Collective on MPI_Comm
3661 
3662    Input Parameters:
3663 +  comm - MPI communicator, set to PETSC_COMM_SELF
3664 .  bs - size of block
3665 .  m - number of rows
3666 .  n - number of columns
3667 .  nz - number of nonzero blocks  per block row (same for all rows)
3668 -  nnz - array containing the number of nonzero blocks in the various block rows
3669          (possibly different for each block row) or PETSC_NULL
3670 
3671    Output Parameter:
3672 .  A - the matrix
3673 
3674    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3675    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3676    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3677 
3678    Options Database Keys:
3679 .   -mat_no_unroll - uses code that does not unroll the loops in the
3680                      block calculations (much slower)
3681 .    -mat_block_size - size of the blocks to use
3682 
3683    Level: intermediate
3684 
3685    Notes:
3686    The number of rows and columns must be divisible by blocksize.
3687 
3688    If the nnz parameter is given then the nz parameter is ignored
3689 
3690    A nonzero block is any block that as 1 or more nonzeros in it
3691 
3692    The block AIJ format is fully compatible with standard Fortran 77
3693    storage.  That is, the stored row and column indices can begin at
3694    either one (as in Fortran) or zero.  See the users' manual for details.
3695 
3696    Specify the preallocated storage with either nz or nnz (not both).
3697    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3698    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3699    matrices.
3700 
3701 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3702 @*/
3703 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3704 {
3705   PetscErrorCode ierr;
3706 
3707   PetscFunctionBegin;
3708   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3709   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3710   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3711   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3712   PetscFunctionReturn(0);
3713 }
3714 
3715 #undef __FUNCT__
3716 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3717 /*@C
3718    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3719    per row in the matrix. For good matrix assembly performance the
3720    user should preallocate the matrix storage by setting the parameter nz
3721    (or the array nnz).  By setting these parameters accurately, performance
3722    during matrix assembly can be increased by more than a factor of 50.
3723 
3724    Collective on MPI_Comm
3725 
3726    Input Parameters:
3727 +  A - the matrix
3728 .  bs - size of block
3729 .  nz - number of block nonzeros per block row (same for all rows)
3730 -  nnz - array containing the number of block nonzeros in the various block rows
3731          (possibly different for each block row) or PETSC_NULL
3732 
3733    Options Database Keys:
3734 .   -mat_no_unroll - uses code that does not unroll the loops in the
3735                      block calculations (much slower)
3736 .    -mat_block_size - size of the blocks to use
3737 
3738    Level: intermediate
3739 
3740    Notes:
3741    If the nnz parameter is given then the nz parameter is ignored
3742 
3743    You can call MatGetInfo() to get information on how effective the preallocation was;
3744    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3745    You can also run with the option -info and look for messages with the string
3746    malloc in them to see if additional memory allocation was needed.
3747 
3748    The block AIJ format is fully compatible with standard Fortran 77
3749    storage.  That is, the stored row and column indices can begin at
3750    either one (as in Fortran) or zero.  See the users' manual for details.
3751 
3752    Specify the preallocated storage with either nz or nnz (not both).
3753    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3754    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3755 
3756 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3757 @*/
3758 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3759 {
3760   PetscErrorCode ierr;
3761 
3762   PetscFunctionBegin;
3763   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3764   PetscValidType(B,1);
3765   PetscValidLogicalCollectiveInt(B,bs,2);
3766   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3767   PetscFunctionReturn(0);
3768 }
3769 
3770 #undef __FUNCT__
3771 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3772 /*@C
3773    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3774    (the default sequential PETSc format).
3775 
3776    Collective on MPI_Comm
3777 
3778    Input Parameters:
3779 +  A - the matrix
3780 .  i - the indices into j for the start of each local row (starts with zero)
3781 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3782 -  v - optional values in the matrix
3783 
3784    Level: developer
3785 
3786 .keywords: matrix, aij, compressed row, sparse
3787 
3788 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3789 @*/
3790 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3791 {
3792   PetscErrorCode ierr;
3793 
3794   PetscFunctionBegin;
3795   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3796   PetscValidType(B,1);
3797   PetscValidLogicalCollectiveInt(B,bs,2);
3798   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3799   PetscFunctionReturn(0);
3800 }
3801 
3802 
3803 #undef __FUNCT__
3804 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3805 /*@
3806      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3807 
3808      Collective on MPI_Comm
3809 
3810    Input Parameters:
3811 +  comm - must be an MPI communicator of size 1
3812 .  bs - size of block
3813 .  m - number of rows
3814 .  n - number of columns
3815 .  i - row indices
3816 .  j - column indices
3817 -  a - matrix values
3818 
3819    Output Parameter:
3820 .  mat - the matrix
3821 
3822    Level: advanced
3823 
3824    Notes:
3825        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3826     once the matrix is destroyed
3827 
3828        You cannot set new nonzero locations into this matrix, that will generate an error.
3829 
3830        The i and j indices are 0 based
3831 
3832        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3833 
3834 
3835 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3836 
3837 @*/
3838 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3839 {
3840   PetscErrorCode ierr;
3841   PetscInt       ii;
3842   Mat_SeqBAIJ    *baij;
3843 
3844   PetscFunctionBegin;
3845   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3846   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3847 
3848   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3849   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3850   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3851   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3852   baij = (Mat_SeqBAIJ*)(*mat)->data;
3853   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3854   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3855 
3856   baij->i = i;
3857   baij->j = j;
3858   baij->a = a;
3859   baij->singlemalloc = PETSC_FALSE;
3860   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3861   baij->free_a       = PETSC_FALSE;
3862   baij->free_ij      = PETSC_FALSE;
3863 
3864   for (ii=0; ii<m; ii++) {
3865     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3866 #if defined(PETSC_USE_DEBUG)
3867     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3868 #endif
3869   }
3870 #if defined(PETSC_USE_DEBUG)
3871   for (ii=0; ii<baij->i[m]; ii++) {
3872     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3873     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3874   }
3875 #endif
3876 
3877   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3878   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3879   PetscFunctionReturn(0);
3880 }
3881