xref: /petsc/src/mat/impls/baij/seq/baij.c (revision c10200c1442b553b7ad65c70101560db4fa22e78)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
21 
22   PetscFunctionBegin;
23   allowzeropivot = PetscNot(A->erroriffailure);
24 
25   if (a->idiagvalid) {
26     if (values) *values = a->idiag;
27     PetscFunctionReturn(0);
28   }
29   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
30   diag_offset = a->diag;
31   if (!a->idiag) {
32     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
33     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
34   }
35   diag  = a->idiag;
36   mdiag = a->idiag+bs2*mbs;
37   if (values) *values = a->idiag;
38   /* factor and invert each block */
39   switch (bs) {
40   case 1:
41     for (i=0; i<mbs; i++) {
42       odiag    = v + 1*diag_offset[i];
43       diag[0]  = odiag[0];
44       mdiag[0] = odiag[0];
45 
46       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
47         if (allowzeropivot) {
48           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
49           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
50           A->factorerror_zeropivot_row   = i;
51           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
52         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
53       }
54 
55       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
56       diag    += 1;
57       mdiag   += 1;
58     }
59     break;
60   case 2:
61     for (i=0; i<mbs; i++) {
62       odiag    = v + 4*diag_offset[i];
63       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
64       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
65       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
66       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
67       diag    += 4;
68       mdiag   += 4;
69     }
70     break;
71   case 3:
72     for (i=0; i<mbs; i++) {
73       odiag    = v + 9*diag_offset[i];
74       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
75       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
76       diag[8]  = odiag[8];
77       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
78       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
79       mdiag[8] = odiag[8];
80       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
81       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
82       diag    += 9;
83       mdiag   += 9;
84     }
85     break;
86   case 4:
87     for (i=0; i<mbs; i++) {
88       odiag  = v + 16*diag_offset[i];
89       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
90       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
91       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
92       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
93       diag  += 16;
94       mdiag += 16;
95     }
96     break;
97   case 5:
98     for (i=0; i<mbs; i++) {
99       odiag  = v + 25*diag_offset[i];
100       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
101       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
103       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
104       diag  += 25;
105       mdiag += 25;
106     }
107     break;
108   case 6:
109     for (i=0; i<mbs; i++) {
110       odiag  = v + 36*diag_offset[i];
111       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
112       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
113       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
114       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
115       diag  += 36;
116       mdiag += 36;
117     }
118     break;
119   case 7:
120     for (i=0; i<mbs; i++) {
121       odiag  = v + 49*diag_offset[i];
122       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
123       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
124       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
125       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
126       diag  += 49;
127       mdiag += 49;
128     }
129     break;
130   default:
131     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
132     for (i=0; i<mbs; i++) {
133       odiag  = v + bs2*diag_offset[i];
134       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
135       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
136       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
137       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
138       diag  += bs2;
139       mdiag += bs2;
140     }
141     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
142   }
143   a->idiagvalid = PETSC_TRUE;
144   PetscFunctionReturn(0);
145 }
146 
147 #undef __FUNCT__
148 #define __FUNCT__ "MatSOR_SeqBAIJ"
149 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
150 {
151   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
152   PetscScalar       *x,*work,*w,*workt,*t;
153   const MatScalar   *v,*aa = a->a, *idiag;
154   const PetscScalar *b,*xb;
155   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
156   PetscErrorCode    ierr;
157   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
158   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
159 
160   PetscFunctionBegin;
161   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
162   its = its*lits;
163   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
164   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
165   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
166   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
167   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
168 
169   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
170 
171   if (!m) PetscFunctionReturn(0);
172   diag  = a->diag;
173   idiag = a->idiag;
174   k    = PetscMax(A->rmap->n,A->cmap->n);
175   if (!a->mult_work) {
176     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
177   }
178   if (!a->sor_workt) {
179     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
180   }
181   if (!a->sor_work) {
182     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
183   }
184   work = a->mult_work;
185   t    = a->sor_workt;
186   w    = a->sor_work;
187 
188   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
189   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
190 
191   if (flag & SOR_ZERO_INITIAL_GUESS) {
192     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
193       switch (bs) {
194       case 1:
195         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
196         t[0] = b[0];
197         i2     = 1;
198         idiag += 1;
199         for (i=1; i<m; i++) {
200           v  = aa + ai[i];
201           vi = aj + ai[i];
202           nz = diag[i] - ai[i];
203           s[0] = b[i2];
204           for (j=0; j<nz; j++) {
205             xw[0] = x[vi[j]];
206             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
207           }
208           t[i2] = s[0];
209           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
210           x[i2]  = xw[0];
211           idiag += 1;
212           i2    += 1;
213         }
214         break;
215       case 2:
216         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
217         t[0] = b[0]; t[1] = b[1];
218         i2     = 2;
219         idiag += 4;
220         for (i=1; i<m; i++) {
221           v  = aa + 4*ai[i];
222           vi = aj + ai[i];
223           nz = diag[i] - ai[i];
224           s[0] = b[i2]; s[1] = b[i2+1];
225           for (j=0; j<nz; j++) {
226             idx = 2*vi[j];
227             it  = 4*j;
228             xw[0] = x[idx]; xw[1] = x[1+idx];
229             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
230           }
231           t[i2] = s[0]; t[i2+1] = s[1];
232           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
233           x[i2]   = xw[0]; x[i2+1] = xw[1];
234           idiag  += 4;
235           i2     += 2;
236         }
237         break;
238       case 3:
239         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
240         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
241         i2     = 3;
242         idiag += 9;
243         for (i=1; i<m; i++) {
244           v  = aa + 9*ai[i];
245           vi = aj + ai[i];
246           nz = diag[i] - ai[i];
247           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
248           while (nz--) {
249             idx = 3*(*vi++);
250             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
251             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
252             v  += 9;
253           }
254           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
255           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
256           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
257           idiag  += 9;
258           i2     += 3;
259         }
260         break;
261       case 4:
262         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
263         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
264         i2     = 4;
265         idiag += 16;
266         for (i=1; i<m; i++) {
267           v  = aa + 16*ai[i];
268           vi = aj + ai[i];
269           nz = diag[i] - ai[i];
270           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
271           while (nz--) {
272             idx = 4*(*vi++);
273             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
274             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
275             v  += 16;
276           }
277           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
278           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
279           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
280           idiag  += 16;
281           i2     += 4;
282         }
283         break;
284       case 5:
285         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
286         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
287         i2     = 5;
288         idiag += 25;
289         for (i=1; i<m; i++) {
290           v  = aa + 25*ai[i];
291           vi = aj + ai[i];
292           nz = diag[i] - ai[i];
293           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
294           while (nz--) {
295             idx = 5*(*vi++);
296             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
297             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
298             v  += 25;
299           }
300           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
301           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
302           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
303           idiag  += 25;
304           i2     += 5;
305         }
306         break;
307       case 6:
308         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
309         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
310         i2     = 6;
311         idiag += 36;
312         for (i=1; i<m; i++) {
313           v  = aa + 36*ai[i];
314           vi = aj + ai[i];
315           nz = diag[i] - ai[i];
316           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
317           while (nz--) {
318             idx = 6*(*vi++);
319             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
320             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
321             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
322             v  += 36;
323           }
324           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
325           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
326           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
327           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
328           idiag  += 36;
329           i2     += 6;
330         }
331         break;
332       case 7:
333         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
334         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
335         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
336         i2     = 7;
337         idiag += 49;
338         for (i=1; i<m; i++) {
339           v  = aa + 49*ai[i];
340           vi = aj + ai[i];
341           nz = diag[i] - ai[i];
342           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
343           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
344           while (nz--) {
345             idx = 7*(*vi++);
346             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
347             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
348             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
349             v  += 49;
350           }
351           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
352           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
353           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
354           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
355           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
356           idiag  += 49;
357           i2     += 7;
358         }
359         break;
360       default:
361         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
362         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
363         i2     = bs;
364         idiag += bs2;
365         for (i=1; i<m; i++) {
366           v  = aa + bs2*ai[i];
367           vi = aj + ai[i];
368           nz = diag[i] - ai[i];
369 
370           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
371           /* copy all rows of x that are needed into contiguous space */
372           workt = work;
373           for (j=0; j<nz; j++) {
374             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
375             workt += bs;
376           }
377           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
378           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
379           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
380 
381           idiag += bs2;
382           i2    += bs;
383         }
384         break;
385       }
386       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
387       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
388       xb = t;
389     }
390     else xb = b;
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
392       idiag = a->idiag+bs2*(a->mbs-1);
393       i2 = bs * (m-1);
394       switch (bs) {
395       case 1:
396         s[0]  = xb[i2];
397         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
398         x[i2] = xw[0];
399         i2   -= 1;
400         for (i=m-2; i>=0; i--) {
401           v  = aa + (diag[i]+1);
402           vi = aj + diag[i] + 1;
403           nz = ai[i+1] - diag[i] - 1;
404           s[0] = xb[i2];
405           for (j=0; j<nz; j++) {
406             xw[0] = x[vi[j]];
407             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
408           }
409           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
410           x[i2]  = xw[0];
411           idiag -= 1;
412           i2    -= 1;
413         }
414         break;
415       case 2:
416         s[0]  = xb[i2]; s[1] = xb[i2+1];
417         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418         x[i2] = xw[0]; x[i2+1] = xw[1];
419         i2    -= 2;
420         idiag -= 4;
421         for (i=m-2; i>=0; i--) {
422           v  = aa + 4*(diag[i] + 1);
423           vi = aj + diag[i] + 1;
424           nz = ai[i+1] - diag[i] - 1;
425           s[0] = xb[i2]; s[1] = xb[i2+1];
426           for (j=0; j<nz; j++) {
427             idx = 2*vi[j];
428             it  = 4*j;
429             xw[0] = x[idx]; xw[1] = x[1+idx];
430             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
431           }
432           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
433           x[i2]   = xw[0]; x[i2+1] = xw[1];
434           idiag  -= 4;
435           i2     -= 2;
436         }
437         break;
438       case 3:
439         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
440         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442         i2    -= 3;
443         idiag -= 9;
444         for (i=m-2; i>=0; i--) {
445           v  = aa + 9*(diag[i]+1);
446           vi = aj + diag[i] + 1;
447           nz = ai[i+1] - diag[i] - 1;
448           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
449           while (nz--) {
450             idx = 3*(*vi++);
451             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
452             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
453             v  += 9;
454           }
455           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
456           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
457           idiag  -= 9;
458           i2     -= 3;
459         }
460         break;
461       case 4:
462         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
463         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465         i2    -= 4;
466         idiag -= 16;
467         for (i=m-2; i>=0; i--) {
468           v  = aa + 16*(diag[i]+1);
469           vi = aj + diag[i] + 1;
470           nz = ai[i+1] - diag[i] - 1;
471           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
472           while (nz--) {
473             idx = 4*(*vi++);
474             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
475             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
476             v  += 16;
477           }
478           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
479           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
480           idiag  -= 16;
481           i2     -= 4;
482         }
483         break;
484       case 5:
485         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
486         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488         i2    -= 5;
489         idiag -= 25;
490         for (i=m-2; i>=0; i--) {
491           v  = aa + 25*(diag[i]+1);
492           vi = aj + diag[i] + 1;
493           nz = ai[i+1] - diag[i] - 1;
494           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
495           while (nz--) {
496             idx = 5*(*vi++);
497             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
498             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
499             v  += 25;
500           }
501           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
502           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
503           idiag  -= 25;
504           i2     -= 5;
505         }
506         break;
507       case 6:
508         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
509         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
510         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
511         i2    -= 6;
512         idiag -= 36;
513         for (i=m-2; i>=0; i--) {
514           v  = aa + 36*(diag[i]+1);
515           vi = aj + diag[i] + 1;
516           nz = ai[i+1] - diag[i] - 1;
517           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
518           while (nz--) {
519             idx = 6*(*vi++);
520             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
521             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
522             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
523             v  += 36;
524           }
525           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
526           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
527           idiag  -= 36;
528           i2     -= 6;
529         }
530         break;
531       case 7:
532         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
533         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
534         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
535         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
536         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
537         i2    -= 7;
538         idiag -= 49;
539         for (i=m-2; i>=0; i--) {
540           v  = aa + 49*(diag[i]+1);
541           vi = aj + diag[i] + 1;
542           nz = ai[i+1] - diag[i] - 1;
543           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
544           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
545           while (nz--) {
546             idx = 7*(*vi++);
547             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
548             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
549             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
550             v  += 49;
551           }
552           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
553           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
554           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
555           idiag  -= 49;
556           i2     -= 7;
557         }
558         break;
559       default:
560         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
561         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
562         i2    -= bs;
563         idiag -= bs2;
564         for (i=m-2; i>=0; i--) {
565           v  = aa + bs2*(diag[i]+1);
566           vi = aj + diag[i] + 1;
567           nz = ai[i+1] - diag[i] - 1;
568 
569           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
570           /* copy all rows of x that are needed into contiguous space */
571           workt = work;
572           for (j=0; j<nz; j++) {
573             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
574             workt += bs;
575           }
576           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
577           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
578 
579           idiag -= bs2;
580           i2    -= bs;
581         }
582         break;
583       }
584       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
585     }
586     its--;
587   }
588   while (its--) {
589     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
590       idiag = a->idiag;
591       i2 = 0;
592       switch (bs) {
593       case 1:
594         for (i=0; i<m; i++) {
595           v  = aa + ai[i];
596           vi = aj + ai[i];
597           nz = ai[i+1] - ai[i];
598           s[0] = b[i2];
599           for (j=0; j<nz; j++) {
600             xw[0] = x[vi[j]];
601             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
602           }
603           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
604           x[i2] += xw[0];
605           idiag += 1;
606           i2    += 1;
607         }
608         break;
609       case 2:
610         for (i=0; i<m; i++) {
611           v  = aa + 4*ai[i];
612           vi = aj + ai[i];
613           nz = ai[i+1] - ai[i];
614           s[0] = b[i2]; s[1] = b[i2+1];
615           for (j=0; j<nz; j++) {
616             idx = 2*vi[j];
617             it  = 4*j;
618             xw[0] = x[idx]; xw[1] = x[1+idx];
619             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
620           }
621           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
622           x[i2]  += xw[0]; x[i2+1] += xw[1];
623           idiag  += 4;
624           i2     += 2;
625         }
626         break;
627       case 3:
628         for (i=0; i<m; i++) {
629           v  = aa + 9*ai[i];
630           vi = aj + ai[i];
631           nz = ai[i+1] - ai[i];
632           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
633           while (nz--) {
634             idx = 3*(*vi++);
635             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
636             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
637             v  += 9;
638           }
639           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
640           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
641           idiag  += 9;
642           i2     += 3;
643         }
644         break;
645       case 4:
646         for (i=0; i<m; i++) {
647           v  = aa + 16*ai[i];
648           vi = aj + ai[i];
649           nz = ai[i+1] - ai[i];
650           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
651           while (nz--) {
652             idx = 4*(*vi++);
653             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
654             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
655             v  += 16;
656           }
657           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
658           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
659           idiag  += 16;
660           i2     += 4;
661         }
662         break;
663       case 5:
664         for (i=0; i<m; i++) {
665           v  = aa + 25*ai[i];
666           vi = aj + ai[i];
667           nz = ai[i+1] - ai[i];
668           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
669           while (nz--) {
670             idx = 5*(*vi++);
671             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
672             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
673             v  += 25;
674           }
675           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
676           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
677           idiag  += 25;
678           i2     += 5;
679         }
680         break;
681       case 6:
682         for (i=0; i<m; i++) {
683           v  = aa + 36*ai[i];
684           vi = aj + ai[i];
685           nz = ai[i+1] - ai[i];
686           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
687           while (nz--) {
688             idx = 6*(*vi++);
689             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
690             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
691             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
692             v  += 36;
693           }
694           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
695           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
696           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
697           idiag  += 36;
698           i2     += 6;
699         }
700         break;
701       case 7:
702         for (i=0; i<m; i++) {
703           v  = aa + 49*ai[i];
704           vi = aj + ai[i];
705           nz = ai[i+1] - ai[i];
706           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
707           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
708           while (nz--) {
709             idx = 7*(*vi++);
710             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
711             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
712             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
713             v  += 49;
714           }
715           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
716           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
717           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
718           idiag  += 49;
719           i2     += 7;
720         }
721         break;
722       default:
723         for (i=0; i<m; i++) {
724           v  = aa + bs2*ai[i];
725           vi = aj + ai[i];
726           nz = ai[i+1] - ai[i];
727 
728           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
729           /* copy all rows of x that are needed into contiguous space */
730           workt = work;
731           for (j=0; j<nz; j++) {
732             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
733             workt += bs;
734           }
735           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
736           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
737 
738           idiag += bs2;
739           i2    += bs;
740         }
741         break;
742       }
743       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
744     }
745     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
746       idiag = a->idiag+bs2*(a->mbs-1);
747       i2 = bs * (m-1);
748       switch (bs) {
749       case 1:
750         for (i=m-1; i>=0; i--) {
751           v  = aa + ai[i];
752           vi = aj + ai[i];
753           nz = ai[i+1] - ai[i];
754           s[0] = b[i2];
755           for (j=0; j<nz; j++) {
756             xw[0] = x[vi[j]];
757             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
758           }
759           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
760           x[i2] += xw[0];
761           idiag -= 1;
762           i2    -= 1;
763         }
764         break;
765       case 2:
766         for (i=m-1; i>=0; i--) {
767           v  = aa + 4*ai[i];
768           vi = aj + ai[i];
769           nz = ai[i+1] - ai[i];
770           s[0] = b[i2]; s[1] = b[i2+1];
771           for (j=0; j<nz; j++) {
772             idx = 2*vi[j];
773             it  = 4*j;
774             xw[0] = x[idx]; xw[1] = x[1+idx];
775             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
776           }
777           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
778           x[i2]  += xw[0]; x[i2+1] += xw[1];
779           idiag  -= 4;
780           i2     -= 2;
781         }
782         break;
783       case 3:
784         for (i=m-1; i>=0; i--) {
785           v  = aa + 9*ai[i];
786           vi = aj + ai[i];
787           nz = ai[i+1] - ai[i];
788           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
789           while (nz--) {
790             idx = 3*(*vi++);
791             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
792             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
793             v  += 9;
794           }
795           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
796           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
797           idiag  -= 9;
798           i2     -= 3;
799         }
800         break;
801       case 4:
802         for (i=m-1; i>=0; i--) {
803           v  = aa + 16*ai[i];
804           vi = aj + ai[i];
805           nz = ai[i+1] - ai[i];
806           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
807           while (nz--) {
808             idx = 4*(*vi++);
809             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
810             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
811             v  += 16;
812           }
813           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
814           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
815           idiag  -= 16;
816           i2     -= 4;
817         }
818         break;
819       case 5:
820         for (i=m-1; i>=0; i--) {
821           v  = aa + 25*ai[i];
822           vi = aj + ai[i];
823           nz = ai[i+1] - ai[i];
824           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
825           while (nz--) {
826             idx = 5*(*vi++);
827             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
828             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
829             v  += 25;
830           }
831           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
832           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
833           idiag  -= 25;
834           i2     -= 5;
835         }
836         break;
837       case 6:
838         for (i=m-1; i>=0; i--) {
839           v  = aa + 36*ai[i];
840           vi = aj + ai[i];
841           nz = ai[i+1] - ai[i];
842           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
843           while (nz--) {
844             idx = 6*(*vi++);
845             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
846             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
847             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
848             v  += 36;
849           }
850           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
851           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
852           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
853           idiag  -= 36;
854           i2     -= 6;
855         }
856         break;
857       case 7:
858         for (i=m-1; i>=0; i--) {
859           v  = aa + 49*ai[i];
860           vi = aj + ai[i];
861           nz = ai[i+1] - ai[i];
862           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
863           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
864           while (nz--) {
865             idx = 7*(*vi++);
866             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
867             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
868             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
869             v  += 49;
870           }
871           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
872           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
873           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
874           idiag  -= 49;
875           i2     -= 7;
876         }
877         break;
878       default:
879         for (i=m-1; i>=0; i--) {
880           v  = aa + bs2*ai[i];
881           vi = aj + ai[i];
882           nz = ai[i+1] - ai[i];
883 
884           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
885           /* copy all rows of x that are needed into contiguous space */
886           workt = work;
887           for (j=0; j<nz; j++) {
888             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
889             workt += bs;
890           }
891           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
892           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
893 
894           idiag -= bs2;
895           i2    -= bs;
896         }
897         break;
898       }
899       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
900     }
901   }
902   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
903   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
904   PetscFunctionReturn(0);
905 }
906 
907 
908 /*
909     Special version for direct calls from Fortran (Used in PETSc-fun3d)
910 */
911 #if defined(PETSC_HAVE_FORTRAN_CAPS)
912 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
913 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
914 #define matsetvaluesblocked4_ matsetvaluesblocked4
915 #endif
916 
917 #undef __FUNCT__
918 #define __FUNCT__ "matsetvaluesblocked4_"
919 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
920 {
921   Mat               A  = *AA;
922   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
923   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
924   PetscInt          *ai    =a->i,*ailen=a->ilen;
925   PetscInt          *aj    =a->j,stepval,lastcol = -1;
926   const PetscScalar *value = v;
927   MatScalar         *ap,*aa = a->a,*bap;
928 
929   PetscFunctionBegin;
930   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
931   stepval = (n-1)*4;
932   for (k=0; k<m; k++) { /* loop over added rows */
933     row  = im[k];
934     rp   = aj + ai[row];
935     ap   = aa + 16*ai[row];
936     nrow = ailen[row];
937     low  = 0;
938     high = nrow;
939     for (l=0; l<n; l++) { /* loop over added columns */
940       col = in[l];
941       if (col <= lastcol)  low = 0;
942       else                high = nrow;
943       lastcol = col;
944       value   = v + k*(stepval+4 + l)*4;
945       while (high-low > 7) {
946         t = (low+high)/2;
947         if (rp[t] > col) high = t;
948         else             low  = t;
949       }
950       for (i=low; i<high; i++) {
951         if (rp[i] > col) break;
952         if (rp[i] == col) {
953           bap = ap +  16*i;
954           for (ii=0; ii<4; ii++,value+=stepval) {
955             for (jj=ii; jj<16; jj+=4) {
956               bap[jj] += *value++;
957             }
958           }
959           goto noinsert2;
960         }
961       }
962       N = nrow++ - 1;
963       high++; /* added new column index thus must search to one higher than before */
964       /* shift up all the later entries in this row */
965       for (ii=N; ii>=i; ii--) {
966         rp[ii+1] = rp[ii];
967         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
968       }
969       if (N >= i) {
970         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
971       }
972       rp[i] = col;
973       bap   = ap +  16*i;
974       for (ii=0; ii<4; ii++,value+=stepval) {
975         for (jj=ii; jj<16; jj+=4) {
976           bap[jj] = *value++;
977         }
978       }
979       noinsert2:;
980       low = i;
981     }
982     ailen[row] = nrow;
983   }
984   PetscFunctionReturnVoid();
985 }
986 
987 #if defined(PETSC_HAVE_FORTRAN_CAPS)
988 #define matsetvalues4_ MATSETVALUES4
989 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
990 #define matsetvalues4_ matsetvalues4
991 #endif
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatSetValues4_"
995 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
996 {
997   Mat         A  = *AA;
998   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
999   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1000   PetscInt    *ai=a->i,*ailen=a->ilen;
1001   PetscInt    *aj=a->j,brow,bcol;
1002   PetscInt    ridx,cidx,lastcol = -1;
1003   MatScalar   *ap,value,*aa=a->a,*bap;
1004 
1005   PetscFunctionBegin;
1006   for (k=0; k<m; k++) { /* loop over added rows */
1007     row  = im[k]; brow = row/4;
1008     rp   = aj + ai[brow];
1009     ap   = aa + 16*ai[brow];
1010     nrow = ailen[brow];
1011     low  = 0;
1012     high = nrow;
1013     for (l=0; l<n; l++) { /* loop over added columns */
1014       col   = in[l]; bcol = col/4;
1015       ridx  = row % 4; cidx = col % 4;
1016       value = v[l + k*n];
1017       if (col <= lastcol)  low = 0;
1018       else                high = nrow;
1019       lastcol = col;
1020       while (high-low > 7) {
1021         t = (low+high)/2;
1022         if (rp[t] > bcol) high = t;
1023         else              low  = t;
1024       }
1025       for (i=low; i<high; i++) {
1026         if (rp[i] > bcol) break;
1027         if (rp[i] == bcol) {
1028           bap   = ap +  16*i + 4*cidx + ridx;
1029           *bap += value;
1030           goto noinsert1;
1031         }
1032       }
1033       N = nrow++ - 1;
1034       high++; /* added new column thus must search to one higher than before */
1035       /* shift up all the later entries in this row */
1036       for (ii=N; ii>=i; ii--) {
1037         rp[ii+1] = rp[ii];
1038         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1039       }
1040       if (N>=i) {
1041         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1042       }
1043       rp[i]                    = bcol;
1044       ap[16*i + 4*cidx + ridx] = value;
1045 noinsert1:;
1046       low = i;
1047     }
1048     ailen[brow] = nrow;
1049   }
1050   PetscFunctionReturnVoid();
1051 }
1052 
1053 /*
1054      Checks for missing diagonals
1055 */
1056 #undef __FUNCT__
1057 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1058 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1059 {
1060   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1061   PetscErrorCode ierr;
1062   PetscInt       *diag,*ii = a->i,i;
1063 
1064   PetscFunctionBegin;
1065   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1066   *missing = PETSC_FALSE;
1067   if (A->rmap->n > 0 && !ii) {
1068     *missing = PETSC_TRUE;
1069     if (d) *d = 0;
1070     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1071   } else {
1072     diag = a->diag;
1073     for (i=0; i<a->mbs; i++) {
1074       if (diag[i] >= ii[i+1]) {
1075         *missing = PETSC_TRUE;
1076         if (d) *d = i;
1077         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1078         break;
1079       }
1080     }
1081   }
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1087 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1088 {
1089   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1090   PetscErrorCode ierr;
1091   PetscInt       i,j,m = a->mbs;
1092 
1093   PetscFunctionBegin;
1094   if (!a->diag) {
1095     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1096     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1097     a->free_diag = PETSC_TRUE;
1098   }
1099   for (i=0; i<m; i++) {
1100     a->diag[i] = a->i[i+1];
1101     for (j=a->i[i]; j<a->i[i+1]; j++) {
1102       if (a->j[j] == i) {
1103         a->diag[i] = j;
1104         break;
1105       }
1106     }
1107   }
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1114 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1115 {
1116   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1117   PetscErrorCode ierr;
1118   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1119   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1120 
1121   PetscFunctionBegin;
1122   *nn = n;
1123   if (!ia) PetscFunctionReturn(0);
1124   if (symmetric) {
1125     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1126     nz   = tia[n];
1127   } else {
1128     tia = a->i; tja = a->j;
1129   }
1130 
1131   if (!blockcompressed && bs > 1) {
1132     (*nn) *= bs;
1133     /* malloc & create the natural set of indices */
1134     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1135     if (n) {
1136       (*ia)[0] = oshift;
1137       for (j=1; j<bs; j++) {
1138         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1139       }
1140     }
1141 
1142     for (i=1; i<n; i++) {
1143       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1144       for (j=1; j<bs; j++) {
1145         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1146       }
1147     }
1148     if (n) {
1149       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1150     }
1151 
1152     if (inja) {
1153       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1154       cnt = 0;
1155       for (i=0; i<n; i++) {
1156         for (j=0; j<bs; j++) {
1157           for (k=tia[i]; k<tia[i+1]; k++) {
1158             for (l=0; l<bs; l++) {
1159               (*ja)[cnt++] = bs*tja[k] + l;
1160             }
1161           }
1162         }
1163       }
1164     }
1165 
1166     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1167       ierr = PetscFree(tia);CHKERRQ(ierr);
1168       ierr = PetscFree(tja);CHKERRQ(ierr);
1169     }
1170   } else if (oshift == 1) {
1171     if (symmetric) {
1172       nz = tia[A->rmap->n/bs];
1173       /*  add 1 to i and j indices */
1174       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1175       *ia = tia;
1176       if (ja) {
1177         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1178         *ja = tja;
1179       }
1180     } else {
1181       nz = a->i[A->rmap->n/bs];
1182       /* malloc space and  add 1 to i and j indices */
1183       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1184       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1185       if (ja) {
1186         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1187         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1188       }
1189     }
1190   } else {
1191     *ia = tia;
1192     if (ja) *ja = tja;
1193   }
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1199 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1200 {
1201   PetscErrorCode ierr;
1202 
1203   PetscFunctionBegin;
1204   if (!ia) PetscFunctionReturn(0);
1205   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1206     ierr = PetscFree(*ia);CHKERRQ(ierr);
1207     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1214 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1215 {
1216   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1217   PetscErrorCode ierr;
1218 
1219   PetscFunctionBegin;
1220 #if defined(PETSC_USE_LOG)
1221   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1222 #endif
1223   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1224   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1225   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1226   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1227   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1228   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1229   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1230   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1231   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1232   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1233   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1234   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1235   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1236 
1237   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1238   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1239   ierr = PetscFree(A->data);CHKERRQ(ierr);
1240 
1241   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1250   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1257 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1258 {
1259   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   switch (op) {
1264   case MAT_ROW_ORIENTED:
1265     a->roworiented = flg;
1266     break;
1267   case MAT_KEEP_NONZERO_PATTERN:
1268     a->keepnonzeropattern = flg;
1269     break;
1270   case MAT_NEW_NONZERO_LOCATIONS:
1271     a->nonew = (flg ? 0 : 1);
1272     break;
1273   case MAT_NEW_NONZERO_LOCATION_ERR:
1274     a->nonew = (flg ? -1 : 0);
1275     break;
1276   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1277     a->nonew = (flg ? -2 : 0);
1278     break;
1279   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1280     a->nounused = (flg ? -1 : 0);
1281     break;
1282   case MAT_NEW_DIAGONALS:
1283   case MAT_IGNORE_OFF_PROC_ENTRIES:
1284   case MAT_USE_HASH_TABLE:
1285     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1286     break;
1287   case MAT_SPD:
1288   case MAT_SYMMETRIC:
1289   case MAT_STRUCTURALLY_SYMMETRIC:
1290   case MAT_HERMITIAN:
1291   case MAT_SYMMETRY_ETERNAL:
1292   case MAT_SUBMAT_SINGLEIS:
1293     /* These options are handled directly by MatSetOption() */
1294     break;
1295   default:
1296     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1297   }
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1304 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1305 {
1306   PetscErrorCode ierr;
1307   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1308   MatScalar      *aa_i;
1309   PetscScalar    *v_i;
1310 
1311   PetscFunctionBegin;
1312   bs  = A->rmap->bs;
1313   bs2 = bs*bs;
1314   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1315 
1316   bn  = row/bs;   /* Block number */
1317   bp  = row % bs; /* Block Position */
1318   M   = ai[bn+1] - ai[bn];
1319   *nz = bs*M;
1320 
1321   if (v) {
1322     *v = 0;
1323     if (*nz) {
1324       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1325       for (i=0; i<M; i++) { /* for each block in the block row */
1326         v_i  = *v + i*bs;
1327         aa_i = aa + bs2*(ai[bn] + i);
1328         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1329       }
1330     }
1331   }
1332 
1333   if (idx) {
1334     *idx = 0;
1335     if (*nz) {
1336       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1337       for (i=0; i<M; i++) { /* for each block in the block row */
1338         idx_i = *idx + i*bs;
1339         itmp  = bs*aj[ai[bn] + i];
1340         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1341       }
1342     }
1343   }
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1349 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1350 {
1351   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1352   PetscErrorCode ierr;
1353 
1354   PetscFunctionBegin;
1355   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1361 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1362 {
1363   PetscErrorCode ierr;
1364 
1365   PetscFunctionBegin;
1366   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1367   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1375 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1376 {
1377   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1378   Mat            C;
1379   PetscErrorCode ierr;
1380   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1381   PetscInt       *rows,*cols,bs2=a->bs2;
1382   MatScalar      *array;
1383 
1384   PetscFunctionBegin;
1385   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1386   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1387     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1388 
1389     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1390     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1391     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1392     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1393     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1394     ierr = PetscFree(col);CHKERRQ(ierr);
1395   } else {
1396     C = *B;
1397   }
1398 
1399   array = a->a;
1400   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1401   for (i=0; i<mbs; i++) {
1402     cols[0] = i*bs;
1403     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1404     len = ai[i+1] - ai[i];
1405     for (j=0; j<len; j++) {
1406       rows[0] = (*aj++)*bs;
1407       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1408       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1409       array += bs2;
1410     }
1411   }
1412   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1413 
1414   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1415   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1416 
1417   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1418     *B = C;
1419   } else {
1420     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1427 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1428 {
1429   PetscErrorCode ierr;
1430   Mat            Btrans;
1431 
1432   PetscFunctionBegin;
1433   *f   = PETSC_FALSE;
1434   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1435   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1436   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1442 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1443 {
1444   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1445   PetscErrorCode ierr;
1446   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1447   int            fd;
1448   PetscScalar    *aa;
1449   FILE           *file;
1450 
1451   PetscFunctionBegin;
1452   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1453   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1454   col_lens[0] = MAT_FILE_CLASSID;
1455 
1456   col_lens[1] = A->rmap->N;
1457   col_lens[2] = A->cmap->n;
1458   col_lens[3] = a->nz*bs2;
1459 
1460   /* store lengths of each row and write (including header) to file */
1461   count = 0;
1462   for (i=0; i<a->mbs; i++) {
1463     for (j=0; j<bs; j++) {
1464       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1465     }
1466   }
1467   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1468   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1469 
1470   /* store column indices (zero start index) */
1471   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1472   count = 0;
1473   for (i=0; i<a->mbs; i++) {
1474     for (j=0; j<bs; j++) {
1475       for (k=a->i[i]; k<a->i[i+1]; k++) {
1476         for (l=0; l<bs; l++) {
1477           jj[count++] = bs*a->j[k] + l;
1478         }
1479       }
1480     }
1481   }
1482   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1483   ierr = PetscFree(jj);CHKERRQ(ierr);
1484 
1485   /* store nonzero values */
1486   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1487   count = 0;
1488   for (i=0; i<a->mbs; i++) {
1489     for (j=0; j<bs; j++) {
1490       for (k=a->i[i]; k<a->i[i+1]; k++) {
1491         for (l=0; l<bs; l++) {
1492           aa[count++] = a->a[bs2*k + l*bs + j];
1493         }
1494       }
1495     }
1496   }
1497   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1498   ierr = PetscFree(aa);CHKERRQ(ierr);
1499 
1500   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1501   if (file) {
1502     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1503   }
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #undef __FUNCT__
1508 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1509 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1510 {
1511   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1512   PetscErrorCode    ierr;
1513   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1514   PetscViewerFormat format;
1515 
1516   PetscFunctionBegin;
1517   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1518   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1519     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1520   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1521     const char *matname;
1522     Mat        aij;
1523     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1524     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1525     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1526     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1527     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1528   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1529       PetscFunctionReturn(0);
1530   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1531     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1532     for (i=0; i<a->mbs; i++) {
1533       for (j=0; j<bs; j++) {
1534         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1535         for (k=a->i[i]; k<a->i[i+1]; k++) {
1536           for (l=0; l<bs; l++) {
1537 #if defined(PETSC_USE_COMPLEX)
1538             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1539               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1540                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1541             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1542               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1543                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1544             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1545               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1546             }
1547 #else
1548             if (a->a[bs2*k + l*bs + j] != 0.0) {
1549               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1550             }
1551 #endif
1552           }
1553         }
1554         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1555       }
1556     }
1557     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1558   } else {
1559     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1560     for (i=0; i<a->mbs; i++) {
1561       for (j=0; j<bs; j++) {
1562         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1563         for (k=a->i[i]; k<a->i[i+1]; k++) {
1564           for (l=0; l<bs; l++) {
1565 #if defined(PETSC_USE_COMPLEX)
1566             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1568                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1571                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1572             } else {
1573               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1574             }
1575 #else
1576             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1577 #endif
1578           }
1579         }
1580         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1581       }
1582     }
1583     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1584   }
1585   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #include <petscdraw.h>
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1592 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1593 {
1594   Mat               A = (Mat) Aa;
1595   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1596   PetscErrorCode    ierr;
1597   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1598   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1599   MatScalar         *aa;
1600   PetscViewer       viewer;
1601   PetscViewerFormat format;
1602 
1603   PetscFunctionBegin;
1604   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1605   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1606   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1607 
1608   /* loop over matrix elements drawing boxes */
1609 
1610   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1611     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1612     /* Blue for negative, Cyan for zero and  Red for positive */
1613     color = PETSC_DRAW_BLUE;
1614     for (i=0,row=0; i<mbs; i++,row+=bs) {
1615       for (j=a->i[i]; j<a->i[i+1]; j++) {
1616         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1617         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1618         aa  = a->a + j*bs2;
1619         for (k=0; k<bs; k++) {
1620           for (l=0; l<bs; l++) {
1621             if (PetscRealPart(*aa++) >=  0.) continue;
1622             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1623           }
1624         }
1625       }
1626     }
1627     color = PETSC_DRAW_CYAN;
1628     for (i=0,row=0; i<mbs; i++,row+=bs) {
1629       for (j=a->i[i]; j<a->i[i+1]; j++) {
1630         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1631         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1632         aa  = a->a + j*bs2;
1633         for (k=0; k<bs; k++) {
1634           for (l=0; l<bs; l++) {
1635             if (PetscRealPart(*aa++) != 0.) continue;
1636             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1637           }
1638         }
1639       }
1640     }
1641     color = PETSC_DRAW_RED;
1642     for (i=0,row=0; i<mbs; i++,row+=bs) {
1643       for (j=a->i[i]; j<a->i[i+1]; j++) {
1644         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1645         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1646         aa  = a->a + j*bs2;
1647         for (k=0; k<bs; k++) {
1648           for (l=0; l<bs; l++) {
1649             if (PetscRealPart(*aa++) <= 0.) continue;
1650             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1651           }
1652         }
1653       }
1654     }
1655     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1656   } else {
1657     /* use contour shading to indicate magnitude of values */
1658     /* first determine max of all nonzero values */
1659     PetscReal minv = 0.0, maxv = 0.0;
1660     PetscDraw popup;
1661 
1662     for (i=0; i<a->nz*a->bs2; i++) {
1663       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1664     }
1665     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1666     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1667     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1668 
1669     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1670     for (i=0,row=0; i<mbs; i++,row+=bs) {
1671       for (j=a->i[i]; j<a->i[i+1]; j++) {
1672         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1673         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1674         aa  = a->a + j*bs2;
1675         for (k=0; k<bs; k++) {
1676           for (l=0; l<bs; l++) {
1677             MatScalar v = *aa++;
1678             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1679             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1680           }
1681         }
1682       }
1683     }
1684     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1685   }
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1691 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1692 {
1693   PetscErrorCode ierr;
1694   PetscReal      xl,yl,xr,yr,w,h;
1695   PetscDraw      draw;
1696   PetscBool      isnull;
1697 
1698   PetscFunctionBegin;
1699   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1700   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1701   if (isnull) PetscFunctionReturn(0);
1702 
1703   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1704   xr  += w;          yr += h;        xl = -w;     yl = -h;
1705   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1706   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1707   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1708   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1709   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatView_SeqBAIJ"
1715 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1716 {
1717   PetscErrorCode ierr;
1718   PetscBool      iascii,isbinary,isdraw;
1719 
1720   PetscFunctionBegin;
1721   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1722   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1723   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1724   if (iascii) {
1725     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1726   } else if (isbinary) {
1727     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1728   } else if (isdraw) {
1729     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1730   } else {
1731     Mat B;
1732     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1733     ierr = MatView(B,viewer);CHKERRQ(ierr);
1734     ierr = MatDestroy(&B);CHKERRQ(ierr);
1735   }
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 
1740 #undef __FUNCT__
1741 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1742 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1743 {
1744   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1745   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1746   PetscInt    *ai = a->i,*ailen = a->ilen;
1747   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1748   MatScalar   *ap,*aa = a->a;
1749 
1750   PetscFunctionBegin;
1751   for (k=0; k<m; k++) { /* loop over rows */
1752     row = im[k]; brow = row/bs;
1753     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1754     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1755     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1756     nrow = ailen[brow];
1757     for (l=0; l<n; l++) { /* loop over columns */
1758       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1759       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1760       col  = in[l];
1761       bcol = col/bs;
1762       cidx = col%bs;
1763       ridx = row%bs;
1764       high = nrow;
1765       low  = 0; /* assume unsorted */
1766       while (high-low > 5) {
1767         t = (low+high)/2;
1768         if (rp[t] > bcol) high = t;
1769         else             low  = t;
1770       }
1771       for (i=low; i<high; i++) {
1772         if (rp[i] > bcol) break;
1773         if (rp[i] == bcol) {
1774           *v++ = ap[bs2*i+bs*cidx+ridx];
1775           goto finished;
1776         }
1777       }
1778       *v++ = 0.0;
1779 finished:;
1780     }
1781   }
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 #undef __FUNCT__
1786 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1787 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1788 {
1789   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1790   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1791   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1792   PetscErrorCode    ierr;
1793   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1794   PetscBool         roworiented=a->roworiented;
1795   const PetscScalar *value     = v;
1796   MatScalar         *ap,*aa = a->a,*bap;
1797 
1798   PetscFunctionBegin;
1799   if (roworiented) {
1800     stepval = (n-1)*bs;
1801   } else {
1802     stepval = (m-1)*bs;
1803   }
1804   for (k=0; k<m; k++) { /* loop over added rows */
1805     row = im[k];
1806     if (row < 0) continue;
1807 #if defined(PETSC_USE_DEBUG)
1808     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1809 #endif
1810     rp   = aj + ai[row];
1811     ap   = aa + bs2*ai[row];
1812     rmax = imax[row];
1813     nrow = ailen[row];
1814     low  = 0;
1815     high = nrow;
1816     for (l=0; l<n; l++) { /* loop over added columns */
1817       if (in[l] < 0) continue;
1818 #if defined(PETSC_USE_DEBUG)
1819       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1820 #endif
1821       col = in[l];
1822       if (roworiented) {
1823         value = v + (k*(stepval+bs) + l)*bs;
1824       } else {
1825         value = v + (l*(stepval+bs) + k)*bs;
1826       }
1827       if (col <= lastcol) low = 0;
1828       else high = nrow;
1829       lastcol = col;
1830       while (high-low > 7) {
1831         t = (low+high)/2;
1832         if (rp[t] > col) high = t;
1833         else             low  = t;
1834       }
1835       for (i=low; i<high; i++) {
1836         if (rp[i] > col) break;
1837         if (rp[i] == col) {
1838           bap = ap +  bs2*i;
1839           if (roworiented) {
1840             if (is == ADD_VALUES) {
1841               for (ii=0; ii<bs; ii++,value+=stepval) {
1842                 for (jj=ii; jj<bs2; jj+=bs) {
1843                   bap[jj] += *value++;
1844                 }
1845               }
1846             } else {
1847               for (ii=0; ii<bs; ii++,value+=stepval) {
1848                 for (jj=ii; jj<bs2; jj+=bs) {
1849                   bap[jj] = *value++;
1850                 }
1851               }
1852             }
1853           } else {
1854             if (is == ADD_VALUES) {
1855               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1856                 for (jj=0; jj<bs; jj++) {
1857                   bap[jj] += value[jj];
1858                 }
1859                 bap += bs;
1860               }
1861             } else {
1862               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1863                 for (jj=0; jj<bs; jj++) {
1864                   bap[jj]  = value[jj];
1865                 }
1866                 bap += bs;
1867               }
1868             }
1869           }
1870           goto noinsert2;
1871         }
1872       }
1873       if (nonew == 1) goto noinsert2;
1874       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1875       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1876       N = nrow++ - 1; high++;
1877       /* shift up all the later entries in this row */
1878       for (ii=N; ii>=i; ii--) {
1879         rp[ii+1] = rp[ii];
1880         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1881       }
1882       if (N >= i) {
1883         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1884       }
1885       rp[i] = col;
1886       bap   = ap +  bs2*i;
1887       if (roworiented) {
1888         for (ii=0; ii<bs; ii++,value+=stepval) {
1889           for (jj=ii; jj<bs2; jj+=bs) {
1890             bap[jj] = *value++;
1891           }
1892         }
1893       } else {
1894         for (ii=0; ii<bs; ii++,value+=stepval) {
1895           for (jj=0; jj<bs; jj++) {
1896             *bap++ = *value++;
1897           }
1898         }
1899       }
1900 noinsert2:;
1901       low = i;
1902     }
1903     ailen[row] = nrow;
1904   }
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1910 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1911 {
1912   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1913   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1914   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1915   PetscErrorCode ierr;
1916   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1917   MatScalar      *aa  = a->a,*ap;
1918   PetscReal      ratio=0.6;
1919 
1920   PetscFunctionBegin;
1921   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1922 
1923   if (m) rmax = ailen[0];
1924   for (i=1; i<mbs; i++) {
1925     /* move each row back by the amount of empty slots (fshift) before it*/
1926     fshift += imax[i-1] - ailen[i-1];
1927     rmax    = PetscMax(rmax,ailen[i]);
1928     if (fshift) {
1929       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1930       N  = ailen[i];
1931       for (j=0; j<N; j++) {
1932         ip[j-fshift] = ip[j];
1933 
1934         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1935       }
1936     }
1937     ai[i] = ai[i-1] + ailen[i-1];
1938   }
1939   if (mbs) {
1940     fshift += imax[mbs-1] - ailen[mbs-1];
1941     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1942   }
1943 
1944   /* reset ilen and imax for each row */
1945   a->nonzerorowcnt = 0;
1946   for (i=0; i<mbs; i++) {
1947     ailen[i] = imax[i] = ai[i+1] - ai[i];
1948     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1949   }
1950   a->nz = ai[mbs];
1951 
1952   /* diagonals may have moved, so kill the diagonal pointers */
1953   a->idiagvalid = PETSC_FALSE;
1954   if (fshift && a->diag) {
1955     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1956     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1957     a->diag = 0;
1958   }
1959   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1960   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1961   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1962   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1963 
1964   A->info.mallocs    += a->reallocs;
1965   a->reallocs         = 0;
1966   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1967   a->rmax             = rmax;
1968 
1969   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 /*
1974    This function returns an array of flags which indicate the locations of contiguous
1975    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1976    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1977    Assume: sizes should be long enough to hold all the values.
1978 */
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1981 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1982 {
1983   PetscInt  i,j,k,row;
1984   PetscBool flg;
1985 
1986   PetscFunctionBegin;
1987   for (i=0,j=0; i<n; j++) {
1988     row = idx[i];
1989     if (row%bs!=0) { /* Not the begining of a block */
1990       sizes[j] = 1;
1991       i++;
1992     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1993       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1994       i++;
1995     } else { /* Begining of the block, so check if the complete block exists */
1996       flg = PETSC_TRUE;
1997       for (k=1; k<bs; k++) {
1998         if (row+k != idx[i+k]) { /* break in the block */
1999           flg = PETSC_FALSE;
2000           break;
2001         }
2002       }
2003       if (flg) { /* No break in the bs */
2004         sizes[j] = bs;
2005         i       += bs;
2006       } else {
2007         sizes[j] = 1;
2008         i++;
2009       }
2010     }
2011   }
2012   *bs_max = j;
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 #undef __FUNCT__
2017 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2018 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2019 {
2020   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2021   PetscErrorCode    ierr;
2022   PetscInt          i,j,k,count,*rows;
2023   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2024   PetscScalar       zero = 0.0;
2025   MatScalar         *aa;
2026   const PetscScalar *xx;
2027   PetscScalar       *bb;
2028 
2029   PetscFunctionBegin;
2030   /* fix right hand side if needed */
2031   if (x && b) {
2032     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2033     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2034     for (i=0; i<is_n; i++) {
2035       bb[is_idx[i]] = diag*xx[is_idx[i]];
2036     }
2037     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2038     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2039   }
2040 
2041   /* Make a copy of the IS and  sort it */
2042   /* allocate memory for rows,sizes */
2043   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2044 
2045   /* copy IS values to rows, and sort them */
2046   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2047   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2048 
2049   if (baij->keepnonzeropattern) {
2050     for (i=0; i<is_n; i++) sizes[i] = 1;
2051     bs_max          = is_n;
2052   } else {
2053     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2054     A->nonzerostate++;
2055   }
2056 
2057   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2058     row = rows[j];
2059     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2060     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2061     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2062     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2063       if (diag != (PetscScalar)0.0) {
2064         if (baij->ilen[row/bs] > 0) {
2065           baij->ilen[row/bs]       = 1;
2066           baij->j[baij->i[row/bs]] = row/bs;
2067 
2068           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2069         }
2070         /* Now insert all the diagonal values for this bs */
2071         for (k=0; k<bs; k++) {
2072           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2073         }
2074       } else { /* (diag == 0.0) */
2075         baij->ilen[row/bs] = 0;
2076       } /* end (diag == 0.0) */
2077     } else { /* (sizes[i] != bs) */
2078 #if defined(PETSC_USE_DEBUG)
2079       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2080 #endif
2081       for (k=0; k<count; k++) {
2082         aa[0] =  zero;
2083         aa   += bs;
2084       }
2085       if (diag != (PetscScalar)0.0) {
2086         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2087       }
2088     }
2089   }
2090 
2091   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2092   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 #undef __FUNCT__
2097 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2098 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2099 {
2100   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2101   PetscErrorCode    ierr;
2102   PetscInt          i,j,k,count;
2103   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2104   PetscScalar       zero = 0.0;
2105   MatScalar         *aa;
2106   const PetscScalar *xx;
2107   PetscScalar       *bb;
2108   PetscBool         *zeroed,vecs = PETSC_FALSE;
2109 
2110   PetscFunctionBegin;
2111   /* fix right hand side if needed */
2112   if (x && b) {
2113     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2114     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2115     vecs = PETSC_TRUE;
2116   }
2117 
2118   /* zero the columns */
2119   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2120   for (i=0; i<is_n; i++) {
2121     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2122     zeroed[is_idx[i]] = PETSC_TRUE;
2123   }
2124   for (i=0; i<A->rmap->N; i++) {
2125     if (!zeroed[i]) {
2126       row = i/bs;
2127       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2128         for (k=0; k<bs; k++) {
2129           col = bs*baij->j[j] + k;
2130           if (zeroed[col]) {
2131             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2132             if (vecs) bb[i] -= aa[0]*xx[col];
2133             aa[0] = 0.0;
2134           }
2135         }
2136       }
2137     } else if (vecs) bb[i] = diag*xx[i];
2138   }
2139   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2140   if (vecs) {
2141     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2142     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2143   }
2144 
2145   /* zero the rows */
2146   for (i=0; i<is_n; i++) {
2147     row   = is_idx[i];
2148     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2149     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2150     for (k=0; k<count; k++) {
2151       aa[0] =  zero;
2152       aa   += bs;
2153     }
2154     if (diag != (PetscScalar)0.0) {
2155       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2156     }
2157   }
2158   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 #undef __FUNCT__
2163 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2164 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2165 {
2166   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2167   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2168   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2169   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2170   PetscErrorCode ierr;
2171   PetscInt       ridx,cidx,bs2=a->bs2;
2172   PetscBool      roworiented=a->roworiented;
2173   MatScalar      *ap,value,*aa=a->a,*bap;
2174 
2175   PetscFunctionBegin;
2176   for (k=0; k<m; k++) { /* loop over added rows */
2177     row  = im[k];
2178     brow = row/bs;
2179     if (row < 0) continue;
2180 #if defined(PETSC_USE_DEBUG)
2181     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2182 #endif
2183     rp   = aj + ai[brow];
2184     ap   = aa + bs2*ai[brow];
2185     rmax = imax[brow];
2186     nrow = ailen[brow];
2187     low  = 0;
2188     high = nrow;
2189     for (l=0; l<n; l++) { /* loop over added columns */
2190       if (in[l] < 0) continue;
2191 #if defined(PETSC_USE_DEBUG)
2192       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2193 #endif
2194       col  = in[l]; bcol = col/bs;
2195       ridx = row % bs; cidx = col % bs;
2196       if (roworiented) {
2197         value = v[l + k*n];
2198       } else {
2199         value = v[k + l*m];
2200       }
2201       if (col <= lastcol) low = 0; else high = nrow;
2202       lastcol = col;
2203       while (high-low > 7) {
2204         t = (low+high)/2;
2205         if (rp[t] > bcol) high = t;
2206         else              low  = t;
2207       }
2208       for (i=low; i<high; i++) {
2209         if (rp[i] > bcol) break;
2210         if (rp[i] == bcol) {
2211           bap = ap +  bs2*i + bs*cidx + ridx;
2212           if (is == ADD_VALUES) *bap += value;
2213           else                  *bap  = value;
2214           goto noinsert1;
2215         }
2216       }
2217       if (nonew == 1) goto noinsert1;
2218       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2219       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2220       N = nrow++ - 1; high++;
2221       /* shift up all the later entries in this row */
2222       for (ii=N; ii>=i; ii--) {
2223         rp[ii+1] = rp[ii];
2224         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2225       }
2226       if (N>=i) {
2227         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2228       }
2229       rp[i]                      = bcol;
2230       ap[bs2*i + bs*cidx + ridx] = value;
2231       a->nz++;
2232       A->nonzerostate++;
2233 noinsert1:;
2234       low = i;
2235     }
2236     ailen[brow] = nrow;
2237   }
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 #undef __FUNCT__
2242 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2243 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2244 {
2245   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2246   Mat            outA;
2247   PetscErrorCode ierr;
2248   PetscBool      row_identity,col_identity;
2249 
2250   PetscFunctionBegin;
2251   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2252   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2253   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2254   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2255 
2256   outA            = inA;
2257   inA->factortype = MAT_FACTOR_LU;
2258   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2259   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2260 
2261   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2262 
2263   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2264   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2265   a->row = row;
2266   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2267   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2268   a->col = col;
2269 
2270   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2271   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2272   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2273   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2274 
2275   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2276   if (!a->solve_work) {
2277     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2278     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2279   }
2280   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2286 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2287 {
2288   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2289   PetscInt    i,nz,mbs;
2290 
2291   PetscFunctionBegin;
2292   nz  = baij->maxnz;
2293   mbs = baij->mbs;
2294   for (i=0; i<nz; i++) {
2295     baij->j[i] = indices[i];
2296   }
2297   baij->nz = nz;
2298   for (i=0; i<mbs; i++) {
2299     baij->ilen[i] = baij->imax[i];
2300   }
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 #undef __FUNCT__
2305 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2306 /*@
2307     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2308        in the matrix.
2309 
2310   Input Parameters:
2311 +  mat - the SeqBAIJ matrix
2312 -  indices - the column indices
2313 
2314   Level: advanced
2315 
2316   Notes:
2317     This can be called if you have precomputed the nonzero structure of the
2318   matrix and want to provide it to the matrix object to improve the performance
2319   of the MatSetValues() operation.
2320 
2321     You MUST have set the correct numbers of nonzeros per row in the call to
2322   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2323 
2324     MUST be called before any calls to MatSetValues();
2325 
2326 @*/
2327 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2328 {
2329   PetscErrorCode ierr;
2330 
2331   PetscFunctionBegin;
2332   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2333   PetscValidPointer(indices,2);
2334   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 #undef __FUNCT__
2339 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2340 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2341 {
2342   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2343   PetscErrorCode ierr;
2344   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2345   PetscReal      atmp;
2346   PetscScalar    *x,zero = 0.0;
2347   MatScalar      *aa;
2348   PetscInt       ncols,brow,krow,kcol;
2349 
2350   PetscFunctionBegin;
2351   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2352   bs  = A->rmap->bs;
2353   aa  = a->a;
2354   ai  = a->i;
2355   aj  = a->j;
2356   mbs = a->mbs;
2357 
2358   ierr = VecSet(v,zero);CHKERRQ(ierr);
2359   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2360   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2361   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2362   for (i=0; i<mbs; i++) {
2363     ncols = ai[1] - ai[0]; ai++;
2364     brow  = bs*i;
2365     for (j=0; j<ncols; j++) {
2366       for (kcol=0; kcol<bs; kcol++) {
2367         for (krow=0; krow<bs; krow++) {
2368           atmp = PetscAbsScalar(*aa);aa++;
2369           row  = brow + krow;   /* row index */
2370           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2371         }
2372       }
2373       aj++;
2374     }
2375   }
2376   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 #undef __FUNCT__
2381 #define __FUNCT__ "MatCopy_SeqBAIJ"
2382 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2383 {
2384   PetscErrorCode ierr;
2385 
2386   PetscFunctionBegin;
2387   /* If the two matrices have the same copy implementation, use fast copy. */
2388   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2389     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2390     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2391     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2392 
2393     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2394     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2395     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2396   } else {
2397     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2398   }
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2404 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2405 {
2406   PetscErrorCode ierr;
2407 
2408   PetscFunctionBegin;
2409   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 #undef __FUNCT__
2414 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2415 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2416 {
2417   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2418 
2419   PetscFunctionBegin;
2420   *array = a->a;
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2426 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2427 {
2428   PetscFunctionBegin;
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2434 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2435 {
2436   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2437   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2438   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2439   PetscErrorCode ierr;
2440 
2441   PetscFunctionBegin;
2442   /* Set the number of nonzeros in the new matrix */
2443   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2449 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2450 {
2451   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2452   PetscErrorCode ierr;
2453   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2454   PetscBLASInt   one=1;
2455 
2456   PetscFunctionBegin;
2457   if (str == SAME_NONZERO_PATTERN) {
2458     PetscScalar  alpha = a;
2459     PetscBLASInt bnz;
2460     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2461     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2462     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2463   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2464     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2465   } else {
2466     Mat      B;
2467     PetscInt *nnz;
2468     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2469     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2470     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2471     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2472     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2473     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2474     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2475     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2476     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2477     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2478     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2479     ierr = PetscFree(nnz);CHKERRQ(ierr);
2480   }
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2486 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2487 {
2488   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2489   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2490   MatScalar   *aa = a->a;
2491 
2492   PetscFunctionBegin;
2493   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 #undef __FUNCT__
2498 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2499 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2500 {
2501   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2502   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2503   MatScalar   *aa = a->a;
2504 
2505   PetscFunctionBegin;
2506   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2512 /*
2513     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2514 */
2515 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2516 {
2517   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2518   PetscErrorCode ierr;
2519   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2520   PetscInt       nz = a->i[m],row,*jj,mr,col;
2521 
2522   PetscFunctionBegin;
2523   *nn = n;
2524   if (!ia) PetscFunctionReturn(0);
2525   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2526   else {
2527     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2528     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2529     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2530     jj   = a->j;
2531     for (i=0; i<nz; i++) {
2532       collengths[jj[i]]++;
2533     }
2534     cia[0] = oshift;
2535     for (i=0; i<n; i++) {
2536       cia[i+1] = cia[i] + collengths[i];
2537     }
2538     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2539     jj   = a->j;
2540     for (row=0; row<m; row++) {
2541       mr = a->i[row+1] - a->i[row];
2542       for (i=0; i<mr; i++) {
2543         col = *jj++;
2544 
2545         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2546       }
2547     }
2548     ierr = PetscFree(collengths);CHKERRQ(ierr);
2549     *ia  = cia; *ja = cja;
2550   }
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 #undef __FUNCT__
2555 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2556 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2557 {
2558   PetscErrorCode ierr;
2559 
2560   PetscFunctionBegin;
2561   if (!ia) PetscFunctionReturn(0);
2562   ierr = PetscFree(*ia);CHKERRQ(ierr);
2563   ierr = PetscFree(*ja);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 /*
2568  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2569  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2570  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2571  */
2572 #undef __FUNCT__
2573 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2574 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2575 {
2576   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2577   PetscErrorCode ierr;
2578   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2579   PetscInt       nz = a->i[m],row,*jj,mr,col;
2580   PetscInt       *cspidx;
2581 
2582   PetscFunctionBegin;
2583   *nn = n;
2584   if (!ia) PetscFunctionReturn(0);
2585 
2586   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2587   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2588   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2589   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2590   jj   = a->j;
2591   for (i=0; i<nz; i++) {
2592     collengths[jj[i]]++;
2593   }
2594   cia[0] = oshift;
2595   for (i=0; i<n; i++) {
2596     cia[i+1] = cia[i] + collengths[i];
2597   }
2598   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2599   jj   = a->j;
2600   for (row=0; row<m; row++) {
2601     mr = a->i[row+1] - a->i[row];
2602     for (i=0; i<mr; i++) {
2603       col = *jj++;
2604       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2605       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2606     }
2607   }
2608   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2609   *ia    = cia; *ja = cja;
2610   *spidx = cspidx;
2611   PetscFunctionReturn(0);
2612 }
2613 
2614 #undef __FUNCT__
2615 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2616 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2617 {
2618   PetscErrorCode ierr;
2619 
2620   PetscFunctionBegin;
2621   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2622   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2623   PetscFunctionReturn(0);
2624 }
2625 
2626 #undef __FUNCT__
2627 #define __FUNCT__ "MatShift_SeqBAIJ"
2628 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2629 {
2630   PetscErrorCode ierr;
2631   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2632 
2633   PetscFunctionBegin;
2634   if (!Y->preallocated || !aij->nz) {
2635     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2636   }
2637   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2638   PetscFunctionReturn(0);
2639 }
2640 
2641 /* -------------------------------------------------------------------*/
2642 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2643                                        MatGetRow_SeqBAIJ,
2644                                        MatRestoreRow_SeqBAIJ,
2645                                        MatMult_SeqBAIJ_N,
2646                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2647                                        MatMultTranspose_SeqBAIJ,
2648                                        MatMultTransposeAdd_SeqBAIJ,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                /* 10*/ 0,
2653                                        MatLUFactor_SeqBAIJ,
2654                                        0,
2655                                        0,
2656                                        MatTranspose_SeqBAIJ,
2657                                /* 15*/ MatGetInfo_SeqBAIJ,
2658                                        MatEqual_SeqBAIJ,
2659                                        MatGetDiagonal_SeqBAIJ,
2660                                        MatDiagonalScale_SeqBAIJ,
2661                                        MatNorm_SeqBAIJ,
2662                                /* 20*/ 0,
2663                                        MatAssemblyEnd_SeqBAIJ,
2664                                        MatSetOption_SeqBAIJ,
2665                                        MatZeroEntries_SeqBAIJ,
2666                                /* 24*/ MatZeroRows_SeqBAIJ,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                /* 29*/ MatSetUp_SeqBAIJ,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                /* 34*/ MatDuplicate_SeqBAIJ,
2677                                        0,
2678                                        0,
2679                                        MatILUFactor_SeqBAIJ,
2680                                        0,
2681                                /* 39*/ MatAXPY_SeqBAIJ,
2682                                        MatGetSubMatrices_SeqBAIJ,
2683                                        MatIncreaseOverlap_SeqBAIJ,
2684                                        MatGetValues_SeqBAIJ,
2685                                        MatCopy_SeqBAIJ,
2686                                /* 44*/ 0,
2687                                        MatScale_SeqBAIJ,
2688                                        MatShift_SeqBAIJ,
2689                                        0,
2690                                        MatZeroRowsColumns_SeqBAIJ,
2691                                /* 49*/ 0,
2692                                        MatGetRowIJ_SeqBAIJ,
2693                                        MatRestoreRowIJ_SeqBAIJ,
2694                                        MatGetColumnIJ_SeqBAIJ,
2695                                        MatRestoreColumnIJ_SeqBAIJ,
2696                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                        MatSetValuesBlocked_SeqBAIJ,
2701                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2702                                        MatDestroy_SeqBAIJ,
2703                                        MatView_SeqBAIJ,
2704                                        0,
2705                                        0,
2706                                /* 64*/ 0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2712                                        0,
2713                                        MatConvert_Basic,
2714                                        0,
2715                                        0,
2716                                /* 74*/ 0,
2717                                        MatFDColoringApply_BAIJ,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                /* 79*/ 0,
2722                                        0,
2723                                        0,
2724                                        0,
2725                                        MatLoad_SeqBAIJ,
2726                                /* 84*/ 0,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                /* 89*/ 0,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                        0,
2736                                /* 94*/ 0,
2737                                        0,
2738                                        0,
2739                                        0,
2740                                        0,
2741                                /* 99*/ 0,
2742                                        0,
2743                                        0,
2744                                        0,
2745                                        0,
2746                                /*104*/ 0,
2747                                        MatRealPart_SeqBAIJ,
2748                                        MatImaginaryPart_SeqBAIJ,
2749                                        0,
2750                                        0,
2751                                /*109*/ 0,
2752                                        0,
2753                                        0,
2754                                        0,
2755                                        MatMissingDiagonal_SeqBAIJ,
2756                                /*114*/ 0,
2757                                        0,
2758                                        0,
2759                                        0,
2760                                        0,
2761                                /*119*/ 0,
2762                                        0,
2763                                        MatMultHermitianTranspose_SeqBAIJ,
2764                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2765                                        0,
2766                                /*124*/ 0,
2767                                        0,
2768                                        MatInvertBlockDiagonal_SeqBAIJ,
2769                                        0,
2770                                        0,
2771                                /*129*/ 0,
2772                                        0,
2773                                        0,
2774                                        0,
2775                                        0,
2776                                /*134*/ 0,
2777                                        0,
2778                                        0,
2779                                        0,
2780                                        0,
2781                                /*139*/ 0,
2782                                        0,
2783                                        0,
2784                                        MatFDColoringSetUp_SeqXAIJ,
2785                                        0,
2786                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2787 };
2788 
2789 #undef __FUNCT__
2790 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2791 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2792 {
2793   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2794   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2795   PetscErrorCode ierr;
2796 
2797   PetscFunctionBegin;
2798   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2799 
2800   /* allocate space for values if not already there */
2801   if (!aij->saved_values) {
2802     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2803     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2804   }
2805 
2806   /* copy values over */
2807   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2808   PetscFunctionReturn(0);
2809 }
2810 
2811 #undef __FUNCT__
2812 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2813 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2814 {
2815   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2816   PetscErrorCode ierr;
2817   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2818 
2819   PetscFunctionBegin;
2820   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2821   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2822 
2823   /* copy values over */
2824   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2829 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2830 
2831 #undef __FUNCT__
2832 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2833 static PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2834 {
2835   Mat_SeqBAIJ    *b;
2836   PetscErrorCode ierr;
2837   PetscInt       i,mbs,nbs,bs2;
2838   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2839 
2840   PetscFunctionBegin;
2841   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2842   if (nz == MAT_SKIP_ALLOCATION) {
2843     skipallocation = PETSC_TRUE;
2844     nz             = 0;
2845   }
2846 
2847   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2848   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2849   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2850   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2851 
2852   B->preallocated = PETSC_TRUE;
2853 
2854   mbs = B->rmap->n/bs;
2855   nbs = B->cmap->n/bs;
2856   bs2 = bs*bs;
2857 
2858   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2859 
2860   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2861   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2862   if (nnz) {
2863     for (i=0; i<mbs; i++) {
2864       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2865       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2866     }
2867   }
2868 
2869   b    = (Mat_SeqBAIJ*)B->data;
2870   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2871   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2872   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2873 
2874   if (!flg) {
2875     switch (bs) {
2876     case 1:
2877       B->ops->mult    = MatMult_SeqBAIJ_1;
2878       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2879       break;
2880     case 2:
2881       B->ops->mult    = MatMult_SeqBAIJ_2;
2882       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2883       break;
2884     case 3:
2885       B->ops->mult    = MatMult_SeqBAIJ_3;
2886       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2887       break;
2888     case 4:
2889       B->ops->mult    = MatMult_SeqBAIJ_4;
2890       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2891       break;
2892     case 5:
2893       B->ops->mult    = MatMult_SeqBAIJ_5;
2894       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2895       break;
2896     case 6:
2897       B->ops->mult    = MatMult_SeqBAIJ_6;
2898       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2899       break;
2900     case 7:
2901       B->ops->mult    = MatMult_SeqBAIJ_7;
2902       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2903       break;
2904     case 15:
2905       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2906       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2907       break;
2908     default:
2909       B->ops->mult    = MatMult_SeqBAIJ_N;
2910       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2911       break;
2912     }
2913   }
2914   B->ops->sor = MatSOR_SeqBAIJ;
2915   b->mbs = mbs;
2916   b->nbs = nbs;
2917   if (!skipallocation) {
2918     if (!b->imax) {
2919       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2920       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2921 
2922       b->free_imax_ilen = PETSC_TRUE;
2923     }
2924     /* b->ilen will count nonzeros in each block row so far. */
2925     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2926     if (!nnz) {
2927       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2928       else if (nz < 0) nz = 1;
2929       for (i=0; i<mbs; i++) b->imax[i] = nz;
2930       nz = nz*mbs;
2931     } else {
2932       nz = 0;
2933       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2934     }
2935 
2936     /* allocate the matrix space */
2937     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2938     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2939     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2940     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2941     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2942 
2943     b->singlemalloc = PETSC_TRUE;
2944     b->i[0]         = 0;
2945     for (i=1; i<mbs+1; i++) {
2946       b->i[i] = b->i[i-1] + b->imax[i-1];
2947     }
2948     b->free_a  = PETSC_TRUE;
2949     b->free_ij = PETSC_TRUE;
2950   } else {
2951     b->free_a  = PETSC_FALSE;
2952     b->free_ij = PETSC_FALSE;
2953   }
2954 
2955   b->bs2              = bs2;
2956   b->mbs              = mbs;
2957   b->nz               = 0;
2958   b->maxnz            = nz;
2959   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2960   B->was_assembled    = PETSC_FALSE;
2961   B->assembled        = PETSC_FALSE;
2962   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2963   PetscFunctionReturn(0);
2964 }
2965 
2966 #undef __FUNCT__
2967 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2968 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2969 {
2970   PetscInt       i,m,nz,nz_max=0,*nnz;
2971   PetscScalar    *values=0;
2972   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2973   PetscErrorCode ierr;
2974 
2975   PetscFunctionBegin;
2976   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2977   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2978   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2979   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2980   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2981   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2982   m    = B->rmap->n/bs;
2983 
2984   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2985   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2986   for (i=0; i<m; i++) {
2987     nz = ii[i+1]- ii[i];
2988     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2989     nz_max = PetscMax(nz_max, nz);
2990     nnz[i] = nz;
2991   }
2992   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2993   ierr = PetscFree(nnz);CHKERRQ(ierr);
2994 
2995   values = (PetscScalar*)V;
2996   if (!values) {
2997     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2998   }
2999   for (i=0; i<m; i++) {
3000     PetscInt          ncols  = ii[i+1] - ii[i];
3001     const PetscInt    *icols = jj + ii[i];
3002     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3003     if (!roworiented) {
3004       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3005     } else {
3006       PetscInt j;
3007       for (j=0; j<ncols; j++) {
3008         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3009         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3010       }
3011     }
3012   }
3013   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3014   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3015   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3016   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3017   PetscFunctionReturn(0);
3018 }
3019 
3020 /*MC
3021    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3022    block sparse compressed row format.
3023 
3024    Options Database Keys:
3025 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3026 
3027   Level: beginner
3028 
3029 .seealso: MatCreateSeqBAIJ()
3030 M*/
3031 
3032 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3033 
3034 #undef __FUNCT__
3035 #define __FUNCT__ "MatCreate_SeqBAIJ"
3036 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3037 {
3038   PetscErrorCode ierr;
3039   PetscMPIInt    size;
3040   Mat_SeqBAIJ    *b;
3041 
3042   PetscFunctionBegin;
3043   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3044   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3045 
3046   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3047   B->data = (void*)b;
3048   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3049 
3050   b->row          = 0;
3051   b->col          = 0;
3052   b->icol         = 0;
3053   b->reallocs     = 0;
3054   b->saved_values = 0;
3055 
3056   b->roworiented        = PETSC_TRUE;
3057   b->nonew              = 0;
3058   b->diag               = 0;
3059   B->spptr              = 0;
3060   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3061   b->keepnonzeropattern = PETSC_FALSE;
3062 
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3068   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3070   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3072   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3073   PetscFunctionReturn(0);
3074 }
3075 
3076 #undef __FUNCT__
3077 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3078 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3079 {
3080   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3081   PetscErrorCode ierr;
3082   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3083 
3084   PetscFunctionBegin;
3085   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3086 
3087   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3088     c->imax           = a->imax;
3089     c->ilen           = a->ilen;
3090     c->free_imax_ilen = PETSC_FALSE;
3091   } else {
3092     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3093     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3094     for (i=0; i<mbs; i++) {
3095       c->imax[i] = a->imax[i];
3096       c->ilen[i] = a->ilen[i];
3097     }
3098     c->free_imax_ilen = PETSC_TRUE;
3099   }
3100 
3101   /* allocate the matrix space */
3102   if (mallocmatspace) {
3103     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3104       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3105       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3106 
3107       c->i            = a->i;
3108       c->j            = a->j;
3109       c->singlemalloc = PETSC_FALSE;
3110       c->free_a       = PETSC_TRUE;
3111       c->free_ij      = PETSC_FALSE;
3112       c->parent       = A;
3113       C->preallocated = PETSC_TRUE;
3114       C->assembled    = PETSC_TRUE;
3115 
3116       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3117       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3118       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3119     } else {
3120       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3121       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3122 
3123       c->singlemalloc = PETSC_TRUE;
3124       c->free_a       = PETSC_TRUE;
3125       c->free_ij      = PETSC_TRUE;
3126 
3127       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3128       if (mbs > 0) {
3129         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3130         if (cpvalues == MAT_COPY_VALUES) {
3131           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3132         } else {
3133           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3134         }
3135       }
3136       C->preallocated = PETSC_TRUE;
3137       C->assembled    = PETSC_TRUE;
3138     }
3139   }
3140 
3141   c->roworiented = a->roworiented;
3142   c->nonew       = a->nonew;
3143 
3144   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3145   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3146 
3147   c->bs2         = a->bs2;
3148   c->mbs         = a->mbs;
3149   c->nbs         = a->nbs;
3150 
3151   if (a->diag) {
3152     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3153       c->diag      = a->diag;
3154       c->free_diag = PETSC_FALSE;
3155     } else {
3156       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3157       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3158       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3159       c->free_diag = PETSC_TRUE;
3160     }
3161   } else c->diag = 0;
3162 
3163   c->nz         = a->nz;
3164   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3165   c->solve_work = NULL;
3166   c->mult_work  = NULL;
3167   c->sor_workt  = NULL;
3168   c->sor_work   = NULL;
3169 
3170   c->compressedrow.use   = a->compressedrow.use;
3171   c->compressedrow.nrows = a->compressedrow.nrows;
3172   if (a->compressedrow.use) {
3173     i    = a->compressedrow.nrows;
3174     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3175     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3176     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3177     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3178   } else {
3179     c->compressedrow.use    = PETSC_FALSE;
3180     c->compressedrow.i      = NULL;
3181     c->compressedrow.rindex = NULL;
3182   }
3183   C->nonzerostate = A->nonzerostate;
3184 
3185   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3186   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3187   PetscFunctionReturn(0);
3188 }
3189 
3190 #undef __FUNCT__
3191 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3192 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3193 {
3194   PetscErrorCode ierr;
3195 
3196   PetscFunctionBegin;
3197   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3198   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3199   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3200   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3201   PetscFunctionReturn(0);
3202 }
3203 
3204 #undef __FUNCT__
3205 #define __FUNCT__ "MatLoad_SeqBAIJ"
3206 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3207 {
3208   Mat_SeqBAIJ    *a;
3209   PetscErrorCode ierr;
3210   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3211   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3212   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3213   PetscInt       *masked,nmask,tmp,bs2,ishift;
3214   PetscMPIInt    size;
3215   int            fd;
3216   PetscScalar    *aa;
3217   MPI_Comm       comm;
3218 
3219   PetscFunctionBegin;
3220   /* force binary viewer to load .info file if it has not yet done so */
3221   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3222   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3223   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3224   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3225   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3226   if (bs < 0) bs = 1;
3227   bs2  = bs*bs;
3228 
3229   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3230   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3231   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3232   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3233   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3234   M = header[1]; N = header[2]; nz = header[3];
3235 
3236   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3237   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3238 
3239   /*
3240      This code adds extra rows to make sure the number of rows is
3241     divisible by the blocksize
3242   */
3243   mbs        = M/bs;
3244   extra_rows = bs - M + bs*(mbs);
3245   if (extra_rows == bs) extra_rows = 0;
3246   else mbs++;
3247   if (extra_rows) {
3248     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3249   }
3250 
3251   /* Set global sizes if not already set */
3252   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3253     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3254   } else { /* Check if the matrix global sizes are correct */
3255     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3256     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3257       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3258     }
3259     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3260   }
3261 
3262   /* read in row lengths */
3263   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3264   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3265   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3266 
3267   /* read in column indices */
3268   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3269   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3270   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3271 
3272   /* loop over row lengths determining block row lengths */
3273   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3274   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3275   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3276   rowcount = 0;
3277   nzcount  = 0;
3278   for (i=0; i<mbs; i++) {
3279     nmask = 0;
3280     for (j=0; j<bs; j++) {
3281       kmax = rowlengths[rowcount];
3282       for (k=0; k<kmax; k++) {
3283         tmp = jj[nzcount++]/bs;
3284         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3285       }
3286       rowcount++;
3287     }
3288     browlengths[i] += nmask;
3289     /* zero out the mask elements we set */
3290     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3291   }
3292 
3293   /* Do preallocation  */
3294   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3295   a    = (Mat_SeqBAIJ*)newmat->data;
3296 
3297   /* set matrix "i" values */
3298   a->i[0] = 0;
3299   for (i=1; i<= mbs; i++) {
3300     a->i[i]      = a->i[i-1] + browlengths[i-1];
3301     a->ilen[i-1] = browlengths[i-1];
3302   }
3303   a->nz = 0;
3304   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3305 
3306   /* read in nonzero values */
3307   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3308   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3309   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3310 
3311   /* set "a" and "j" values into matrix */
3312   nzcount = 0; jcount = 0;
3313   for (i=0; i<mbs; i++) {
3314     nzcountb = nzcount;
3315     nmask    = 0;
3316     for (j=0; j<bs; j++) {
3317       kmax = rowlengths[i*bs+j];
3318       for (k=0; k<kmax; k++) {
3319         tmp = jj[nzcount++]/bs;
3320         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3321       }
3322     }
3323     /* sort the masked values */
3324     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3325 
3326     /* set "j" values into matrix */
3327     maskcount = 1;
3328     for (j=0; j<nmask; j++) {
3329       a->j[jcount++]  = masked[j];
3330       mask[masked[j]] = maskcount++;
3331     }
3332     /* set "a" values into matrix */
3333     ishift = bs2*a->i[i];
3334     for (j=0; j<bs; j++) {
3335       kmax = rowlengths[i*bs+j];
3336       for (k=0; k<kmax; k++) {
3337         tmp       = jj[nzcountb]/bs;
3338         block     = mask[tmp] - 1;
3339         point     = jj[nzcountb] - bs*tmp;
3340         idx       = ishift + bs2*block + j + bs*point;
3341         a->a[idx] = (MatScalar)aa[nzcountb++];
3342       }
3343     }
3344     /* zero out the mask elements we set */
3345     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3346   }
3347   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3348 
3349   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3350   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3351   ierr = PetscFree(aa);CHKERRQ(ierr);
3352   ierr = PetscFree(jj);CHKERRQ(ierr);
3353   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3354 
3355   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3356   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3357   PetscFunctionReturn(0);
3358 }
3359 
3360 #undef __FUNCT__
3361 #define __FUNCT__ "MatCreateSeqBAIJ"
3362 /*@C
3363    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3364    compressed row) format.  For good matrix assembly performance the
3365    user should preallocate the matrix storage by setting the parameter nz
3366    (or the array nnz).  By setting these parameters accurately, performance
3367    during matrix assembly can be increased by more than a factor of 50.
3368 
3369    Collective on MPI_Comm
3370 
3371    Input Parameters:
3372 +  comm - MPI communicator, set to PETSC_COMM_SELF
3373 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3374           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3375 .  m - number of rows
3376 .  n - number of columns
3377 .  nz - number of nonzero blocks  per block row (same for all rows)
3378 -  nnz - array containing the number of nonzero blocks in the various block rows
3379          (possibly different for each block row) or NULL
3380 
3381    Output Parameter:
3382 .  A - the matrix
3383 
3384    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3385    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3386    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3387 
3388    Options Database Keys:
3389 .   -mat_no_unroll - uses code that does not unroll the loops in the
3390                      block calculations (much slower)
3391 .    -mat_block_size - size of the blocks to use
3392 
3393    Level: intermediate
3394 
3395    Notes:
3396    The number of rows and columns must be divisible by blocksize.
3397 
3398    If the nnz parameter is given then the nz parameter is ignored
3399 
3400    A nonzero block is any block that as 1 or more nonzeros in it
3401 
3402    The block AIJ format is fully compatible with standard Fortran 77
3403    storage.  That is, the stored row and column indices can begin at
3404    either one (as in Fortran) or zero.  See the users' manual for details.
3405 
3406    Specify the preallocated storage with either nz or nnz (not both).
3407    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3408    allocation.  See Users-Manual: ch_mat for details.
3409    matrices.
3410 
3411 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3412 @*/
3413 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3414 {
3415   PetscErrorCode ierr;
3416 
3417   PetscFunctionBegin;
3418   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3419   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3420   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3421   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3422   PetscFunctionReturn(0);
3423 }
3424 
3425 #undef __FUNCT__
3426 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3427 /*@C
3428    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3429    per row in the matrix. For good matrix assembly performance the
3430    user should preallocate the matrix storage by setting the parameter nz
3431    (or the array nnz).  By setting these parameters accurately, performance
3432    during matrix assembly can be increased by more than a factor of 50.
3433 
3434    Collective on MPI_Comm
3435 
3436    Input Parameters:
3437 +  B - the matrix
3438 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3439           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3440 .  nz - number of block nonzeros per block row (same for all rows)
3441 -  nnz - array containing the number of block nonzeros in the various block rows
3442          (possibly different for each block row) or NULL
3443 
3444    Options Database Keys:
3445 .   -mat_no_unroll - uses code that does not unroll the loops in the
3446                      block calculations (much slower)
3447 .   -mat_block_size - size of the blocks to use
3448 
3449    Level: intermediate
3450 
3451    Notes:
3452    If the nnz parameter is given then the nz parameter is ignored
3453 
3454    You can call MatGetInfo() to get information on how effective the preallocation was;
3455    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3456    You can also run with the option -info and look for messages with the string
3457    malloc in them to see if additional memory allocation was needed.
3458 
3459    The block AIJ format is fully compatible with standard Fortran 77
3460    storage.  That is, the stored row and column indices can begin at
3461    either one (as in Fortran) or zero.  See the users' manual for details.
3462 
3463    Specify the preallocated storage with either nz or nnz (not both).
3464    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3465    allocation.  See Users-Manual: ch_mat for details.
3466 
3467 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3468 @*/
3469 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3470 {
3471   PetscErrorCode ierr;
3472 
3473   PetscFunctionBegin;
3474   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3475   PetscValidType(B,1);
3476   PetscValidLogicalCollectiveInt(B,bs,2);
3477   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3478   PetscFunctionReturn(0);
3479 }
3480 
3481 #undef __FUNCT__
3482 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3483 /*@C
3484    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3485    (the default sequential PETSc format).
3486 
3487    Collective on MPI_Comm
3488 
3489    Input Parameters:
3490 +  B - the matrix
3491 .  i - the indices into j for the start of each local row (starts with zero)
3492 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3493 -  v - optional values in the matrix
3494 
3495    Level: developer
3496 
3497    Notes:
3498    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3499    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3500    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3501    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3502    block column and the second index is over columns within a block.
3503 
3504 .keywords: matrix, aij, compressed row, sparse
3505 
3506 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3507 @*/
3508 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3509 {
3510   PetscErrorCode ierr;
3511 
3512   PetscFunctionBegin;
3513   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3514   PetscValidType(B,1);
3515   PetscValidLogicalCollectiveInt(B,bs,2);
3516   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3517   PetscFunctionReturn(0);
3518 }
3519 
3520 
3521 #undef __FUNCT__
3522 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3523 /*@
3524      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3525 
3526      Collective on MPI_Comm
3527 
3528    Input Parameters:
3529 +  comm - must be an MPI communicator of size 1
3530 .  bs - size of block
3531 .  m - number of rows
3532 .  n - number of columns
3533 .  i - row indices
3534 .  j - column indices
3535 -  a - matrix values
3536 
3537    Output Parameter:
3538 .  mat - the matrix
3539 
3540    Level: advanced
3541 
3542    Notes:
3543        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3544     once the matrix is destroyed
3545 
3546        You cannot set new nonzero locations into this matrix, that will generate an error.
3547 
3548        The i and j indices are 0 based
3549 
3550        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3551 
3552       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3553       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3554       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3555       with column-major ordering within blocks.
3556 
3557 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3558 
3559 @*/
3560 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3561 {
3562   PetscErrorCode ierr;
3563   PetscInt       ii;
3564   Mat_SeqBAIJ    *baij;
3565 
3566   PetscFunctionBegin;
3567   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3568   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3569 
3570   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3571   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3572   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3573   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3574   baij = (Mat_SeqBAIJ*)(*mat)->data;
3575   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3576   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3577 
3578   baij->i = i;
3579   baij->j = j;
3580   baij->a = a;
3581 
3582   baij->singlemalloc = PETSC_FALSE;
3583   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3584   baij->free_a       = PETSC_FALSE;
3585   baij->free_ij      = PETSC_FALSE;
3586 
3587   for (ii=0; ii<m; ii++) {
3588     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3589 #if defined(PETSC_USE_DEBUG)
3590     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3591 #endif
3592   }
3593 #if defined(PETSC_USE_DEBUG)
3594   for (ii=0; ii<baij->i[m]; ii++) {
3595     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3596     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3597   }
3598 #endif
3599 
3600   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3601   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3602   PetscFunctionReturn(0);
3603 }
3604 
3605 #undef __FUNCT__
3606 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3607 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3608 {
3609   PetscErrorCode ierr;
3610 
3611   PetscFunctionBegin;
3612   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3613   PetscFunctionReturn(0);
3614 }
3615