xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 96e086a24b9cdc04c368e2851f46791dcaa837d7)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,const MatType,MatReuse,Mat*);
17 #endif
18 
19 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
20 {
21   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
22   PetscErrorCode ierr;
23   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
24   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
25   PetscReal      shift = 0.0;
26   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
27 
28   PetscFunctionBegin;
29   allowzeropivot = PetscNot(A->erroriffailure);
30 
31   if (a->idiagvalid) {
32     if (values) *values = a->idiag;
33     PetscFunctionReturn(0);
34   }
35   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
36   diag_offset = a->diag;
37   if (!a->idiag) {
38     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
39     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
40   }
41   diag  = a->idiag;
42   mdiag = a->idiag+bs2*mbs;
43   if (values) *values = a->idiag;
44   /* factor and invert each block */
45   switch (bs) {
46   case 1:
47     for (i=0; i<mbs; i++) {
48       odiag    = v + 1*diag_offset[i];
49       diag[0]  = odiag[0];
50       mdiag[0] = odiag[0];
51 
52       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53         if (allowzeropivot) {
54           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56           A->factorerror_zeropivot_row   = i;
57           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
58         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59       }
60 
61       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
62       diag    += 1;
63       mdiag   += 1;
64     }
65     break;
66   case 2:
67     for (i=0; i<mbs; i++) {
68       odiag    = v + 4*diag_offset[i];
69       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
70       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
71       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
72       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
73       diag    += 4;
74       mdiag   += 4;
75     }
76     break;
77   case 3:
78     for (i=0; i<mbs; i++) {
79       odiag    = v + 9*diag_offset[i];
80       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
81       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
82       diag[8]  = odiag[8];
83       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
84       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
85       mdiag[8] = odiag[8];
86       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
87       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
88       diag    += 9;
89       mdiag   += 9;
90     }
91     break;
92   case 4:
93     for (i=0; i<mbs; i++) {
94       odiag  = v + 16*diag_offset[i];
95       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
97       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
98       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
99       diag  += 16;
100       mdiag += 16;
101     }
102     break;
103   case 5:
104     for (i=0; i<mbs; i++) {
105       odiag  = v + 25*diag_offset[i];
106       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
107       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
108       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
109       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
110       diag  += 25;
111       mdiag += 25;
112     }
113     break;
114   case 6:
115     for (i=0; i<mbs; i++) {
116       odiag  = v + 36*diag_offset[i];
117       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
118       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
119       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
120       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
121       diag  += 36;
122       mdiag += 36;
123     }
124     break;
125   case 7:
126     for (i=0; i<mbs; i++) {
127       odiag  = v + 49*diag_offset[i];
128       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
129       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
130       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
131       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
132       diag  += 49;
133       mdiag += 49;
134     }
135     break;
136   default:
137     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
138     for (i=0; i<mbs; i++) {
139       odiag  = v + bs2*diag_offset[i];
140       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
141       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
142       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
143       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
144       diag  += bs2;
145       mdiag += bs2;
146     }
147     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
148   }
149   a->idiagvalid = PETSC_TRUE;
150   PetscFunctionReturn(0);
151 }
152 
153 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
154 {
155   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
156   PetscScalar       *x,*work,*w,*workt,*t;
157   const MatScalar   *v,*aa = a->a, *idiag;
158   const PetscScalar *b,*xb;
159   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
160   PetscErrorCode    ierr;
161   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
162   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
163 
164   PetscFunctionBegin;
165   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
166   its = its*lits;
167   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
168   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
169   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
170   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
171   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
172 
173   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
174 
175   if (!m) PetscFunctionReturn(0);
176   diag  = a->diag;
177   idiag = a->idiag;
178   k    = PetscMax(A->rmap->n,A->cmap->n);
179   if (!a->mult_work) {
180     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
181   }
182   if (!a->sor_workt) {
183     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
184   }
185   if (!a->sor_work) {
186     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
187   }
188   work = a->mult_work;
189   t    = a->sor_workt;
190   w    = a->sor_work;
191 
192   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
193   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
194 
195   if (flag & SOR_ZERO_INITIAL_GUESS) {
196     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
197       switch (bs) {
198       case 1:
199         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
200         t[0] = b[0];
201         i2     = 1;
202         idiag += 1;
203         for (i=1; i<m; i++) {
204           v  = aa + ai[i];
205           vi = aj + ai[i];
206           nz = diag[i] - ai[i];
207           s[0] = b[i2];
208           for (j=0; j<nz; j++) {
209             xw[0] = x[vi[j]];
210             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
211           }
212           t[i2] = s[0];
213           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
214           x[i2]  = xw[0];
215           idiag += 1;
216           i2    += 1;
217         }
218         break;
219       case 2:
220         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
221         t[0] = b[0]; t[1] = b[1];
222         i2     = 2;
223         idiag += 4;
224         for (i=1; i<m; i++) {
225           v  = aa + 4*ai[i];
226           vi = aj + ai[i];
227           nz = diag[i] - ai[i];
228           s[0] = b[i2]; s[1] = b[i2+1];
229           for (j=0; j<nz; j++) {
230             idx = 2*vi[j];
231             it  = 4*j;
232             xw[0] = x[idx]; xw[1] = x[1+idx];
233             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
234           }
235           t[i2] = s[0]; t[i2+1] = s[1];
236           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
237           x[i2]   = xw[0]; x[i2+1] = xw[1];
238           idiag  += 4;
239           i2     += 2;
240         }
241         break;
242       case 3:
243         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
244         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
245         i2     = 3;
246         idiag += 9;
247         for (i=1; i<m; i++) {
248           v  = aa + 9*ai[i];
249           vi = aj + ai[i];
250           nz = diag[i] - ai[i];
251           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
252           while (nz--) {
253             idx = 3*(*vi++);
254             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
255             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
256             v  += 9;
257           }
258           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
259           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
260           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
261           idiag  += 9;
262           i2     += 3;
263         }
264         break;
265       case 4:
266         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
267         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
268         i2     = 4;
269         idiag += 16;
270         for (i=1; i<m; i++) {
271           v  = aa + 16*ai[i];
272           vi = aj + ai[i];
273           nz = diag[i] - ai[i];
274           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
275           while (nz--) {
276             idx = 4*(*vi++);
277             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
278             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
279             v  += 16;
280           }
281           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
282           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
283           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
284           idiag  += 16;
285           i2     += 4;
286         }
287         break;
288       case 5:
289         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
290         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
291         i2     = 5;
292         idiag += 25;
293         for (i=1; i<m; i++) {
294           v  = aa + 25*ai[i];
295           vi = aj + ai[i];
296           nz = diag[i] - ai[i];
297           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
298           while (nz--) {
299             idx = 5*(*vi++);
300             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
301             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
302             v  += 25;
303           }
304           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
305           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
306           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
307           idiag  += 25;
308           i2     += 5;
309         }
310         break;
311       case 6:
312         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
313         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
314         i2     = 6;
315         idiag += 36;
316         for (i=1; i<m; i++) {
317           v  = aa + 36*ai[i];
318           vi = aj + ai[i];
319           nz = diag[i] - ai[i];
320           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
321           while (nz--) {
322             idx = 6*(*vi++);
323             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
324             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
325             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
326             v  += 36;
327           }
328           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
329           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
330           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
331           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
332           idiag  += 36;
333           i2     += 6;
334         }
335         break;
336       case 7:
337         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
338         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
339         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
340         i2     = 7;
341         idiag += 49;
342         for (i=1; i<m; i++) {
343           v  = aa + 49*ai[i];
344           vi = aj + ai[i];
345           nz = diag[i] - ai[i];
346           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
347           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
348           while (nz--) {
349             idx = 7*(*vi++);
350             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
351             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
352             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
353             v  += 49;
354           }
355           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
356           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
357           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
358           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
359           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
360           idiag  += 49;
361           i2     += 7;
362         }
363         break;
364       default:
365         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
366         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
367         i2     = bs;
368         idiag += bs2;
369         for (i=1; i<m; i++) {
370           v  = aa + bs2*ai[i];
371           vi = aj + ai[i];
372           nz = diag[i] - ai[i];
373 
374           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
375           /* copy all rows of x that are needed into contiguous space */
376           workt = work;
377           for (j=0; j<nz; j++) {
378             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
379             workt += bs;
380           }
381           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
382           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
383           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
384 
385           idiag += bs2;
386           i2    += bs;
387         }
388         break;
389       }
390       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
391       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
392       xb = t;
393     }
394     else xb = b;
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
396       idiag = a->idiag+bs2*(a->mbs-1);
397       i2 = bs * (m-1);
398       switch (bs) {
399       case 1:
400         s[0]  = xb[i2];
401         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
402         x[i2] = xw[0];
403         i2   -= 1;
404         for (i=m-2; i>=0; i--) {
405           v  = aa + (diag[i]+1);
406           vi = aj + diag[i] + 1;
407           nz = ai[i+1] - diag[i] - 1;
408           s[0] = xb[i2];
409           for (j=0; j<nz; j++) {
410             xw[0] = x[vi[j]];
411             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
412           }
413           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
414           x[i2]  = xw[0];
415           idiag -= 1;
416           i2    -= 1;
417         }
418         break;
419       case 2:
420         s[0]  = xb[i2]; s[1] = xb[i2+1];
421         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
422         x[i2] = xw[0]; x[i2+1] = xw[1];
423         i2    -= 2;
424         idiag -= 4;
425         for (i=m-2; i>=0; i--) {
426           v  = aa + 4*(diag[i] + 1);
427           vi = aj + diag[i] + 1;
428           nz = ai[i+1] - diag[i] - 1;
429           s[0] = xb[i2]; s[1] = xb[i2+1];
430           for (j=0; j<nz; j++) {
431             idx = 2*vi[j];
432             it  = 4*j;
433             xw[0] = x[idx]; xw[1] = x[1+idx];
434             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
435           }
436           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
437           x[i2]   = xw[0]; x[i2+1] = xw[1];
438           idiag  -= 4;
439           i2     -= 2;
440         }
441         break;
442       case 3:
443         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
444         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
445         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
446         i2    -= 3;
447         idiag -= 9;
448         for (i=m-2; i>=0; i--) {
449           v  = aa + 9*(diag[i]+1);
450           vi = aj + diag[i] + 1;
451           nz = ai[i+1] - diag[i] - 1;
452           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
453           while (nz--) {
454             idx = 3*(*vi++);
455             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
456             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
457             v  += 9;
458           }
459           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
460           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
461           idiag  -= 9;
462           i2     -= 3;
463         }
464         break;
465       case 4:
466         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
467         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
468         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
469         i2    -= 4;
470         idiag -= 16;
471         for (i=m-2; i>=0; i--) {
472           v  = aa + 16*(diag[i]+1);
473           vi = aj + diag[i] + 1;
474           nz = ai[i+1] - diag[i] - 1;
475           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
476           while (nz--) {
477             idx = 4*(*vi++);
478             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
479             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
480             v  += 16;
481           }
482           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
483           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
484           idiag  -= 16;
485           i2     -= 4;
486         }
487         break;
488       case 5:
489         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
490         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
491         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
492         i2    -= 5;
493         idiag -= 25;
494         for (i=m-2; i>=0; i--) {
495           v  = aa + 25*(diag[i]+1);
496           vi = aj + diag[i] + 1;
497           nz = ai[i+1] - diag[i] - 1;
498           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
499           while (nz--) {
500             idx = 5*(*vi++);
501             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
502             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
503             v  += 25;
504           }
505           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
506           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
507           idiag  -= 25;
508           i2     -= 5;
509         }
510         break;
511       case 6:
512         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
513         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
514         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
515         i2    -= 6;
516         idiag -= 36;
517         for (i=m-2; i>=0; i--) {
518           v  = aa + 36*(diag[i]+1);
519           vi = aj + diag[i] + 1;
520           nz = ai[i+1] - diag[i] - 1;
521           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
522           while (nz--) {
523             idx = 6*(*vi++);
524             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
525             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
526             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
527             v  += 36;
528           }
529           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
530           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
531           idiag  -= 36;
532           i2     -= 6;
533         }
534         break;
535       case 7:
536         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
537         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
538         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
539         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541         i2    -= 7;
542         idiag -= 49;
543         for (i=m-2; i>=0; i--) {
544           v  = aa + 49*(diag[i]+1);
545           vi = aj + diag[i] + 1;
546           nz = ai[i+1] - diag[i] - 1;
547           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
548           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
549           while (nz--) {
550             idx = 7*(*vi++);
551             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
552             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
553             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
554             v  += 49;
555           }
556           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
557           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
558           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
559           idiag  -= 49;
560           i2     -= 7;
561         }
562         break;
563       default:
564         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
565         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
566         i2    -= bs;
567         idiag -= bs2;
568         for (i=m-2; i>=0; i--) {
569           v  = aa + bs2*(diag[i]+1);
570           vi = aj + diag[i] + 1;
571           nz = ai[i+1] - diag[i] - 1;
572 
573           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
574           /* copy all rows of x that are needed into contiguous space */
575           workt = work;
576           for (j=0; j<nz; j++) {
577             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
578             workt += bs;
579           }
580           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
581           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
582 
583           idiag -= bs2;
584           i2    -= bs;
585         }
586         break;
587       }
588       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
589     }
590     its--;
591   }
592   while (its--) {
593     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
594       idiag = a->idiag;
595       i2 = 0;
596       switch (bs) {
597       case 1:
598         for (i=0; i<m; i++) {
599           v  = aa + ai[i];
600           vi = aj + ai[i];
601           nz = ai[i+1] - ai[i];
602           s[0] = b[i2];
603           for (j=0; j<nz; j++) {
604             xw[0] = x[vi[j]];
605             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
606           }
607           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
608           x[i2] += xw[0];
609           idiag += 1;
610           i2    += 1;
611         }
612         break;
613       case 2:
614         for (i=0; i<m; i++) {
615           v  = aa + 4*ai[i];
616           vi = aj + ai[i];
617           nz = ai[i+1] - ai[i];
618           s[0] = b[i2]; s[1] = b[i2+1];
619           for (j=0; j<nz; j++) {
620             idx = 2*vi[j];
621             it  = 4*j;
622             xw[0] = x[idx]; xw[1] = x[1+idx];
623             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
624           }
625           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
626           x[i2]  += xw[0]; x[i2+1] += xw[1];
627           idiag  += 4;
628           i2     += 2;
629         }
630         break;
631       case 3:
632         for (i=0; i<m; i++) {
633           v  = aa + 9*ai[i];
634           vi = aj + ai[i];
635           nz = ai[i+1] - ai[i];
636           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
637           while (nz--) {
638             idx = 3*(*vi++);
639             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
640             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
641             v  += 9;
642           }
643           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
644           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
645           idiag  += 9;
646           i2     += 3;
647         }
648         break;
649       case 4:
650         for (i=0; i<m; i++) {
651           v  = aa + 16*ai[i];
652           vi = aj + ai[i];
653           nz = ai[i+1] - ai[i];
654           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
655           while (nz--) {
656             idx = 4*(*vi++);
657             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
658             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
659             v  += 16;
660           }
661           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
662           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
663           idiag  += 16;
664           i2     += 4;
665         }
666         break;
667       case 5:
668         for (i=0; i<m; i++) {
669           v  = aa + 25*ai[i];
670           vi = aj + ai[i];
671           nz = ai[i+1] - ai[i];
672           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
673           while (nz--) {
674             idx = 5*(*vi++);
675             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
676             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
677             v  += 25;
678           }
679           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
680           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
681           idiag  += 25;
682           i2     += 5;
683         }
684         break;
685       case 6:
686         for (i=0; i<m; i++) {
687           v  = aa + 36*ai[i];
688           vi = aj + ai[i];
689           nz = ai[i+1] - ai[i];
690           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
691           while (nz--) {
692             idx = 6*(*vi++);
693             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
694             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
695             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
696             v  += 36;
697           }
698           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
699           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
700           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
701           idiag  += 36;
702           i2     += 6;
703         }
704         break;
705       case 7:
706         for (i=0; i<m; i++) {
707           v  = aa + 49*ai[i];
708           vi = aj + ai[i];
709           nz = ai[i+1] - ai[i];
710           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
711           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
712           while (nz--) {
713             idx = 7*(*vi++);
714             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
715             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
716             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
717             v  += 49;
718           }
719           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
720           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
721           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
722           idiag  += 49;
723           i2     += 7;
724         }
725         break;
726       default:
727         for (i=0; i<m; i++) {
728           v  = aa + bs2*ai[i];
729           vi = aj + ai[i];
730           nz = ai[i+1] - ai[i];
731 
732           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
733           /* copy all rows of x that are needed into contiguous space */
734           workt = work;
735           for (j=0; j<nz; j++) {
736             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
737             workt += bs;
738           }
739           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
740           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
741 
742           idiag += bs2;
743           i2    += bs;
744         }
745         break;
746       }
747       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
748     }
749     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
750       idiag = a->idiag+bs2*(a->mbs-1);
751       i2 = bs * (m-1);
752       switch (bs) {
753       case 1:
754         for (i=m-1; i>=0; i--) {
755           v  = aa + ai[i];
756           vi = aj + ai[i];
757           nz = ai[i+1] - ai[i];
758           s[0] = b[i2];
759           for (j=0; j<nz; j++) {
760             xw[0] = x[vi[j]];
761             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
762           }
763           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
764           x[i2] += xw[0];
765           idiag -= 1;
766           i2    -= 1;
767         }
768         break;
769       case 2:
770         for (i=m-1; i>=0; i--) {
771           v  = aa + 4*ai[i];
772           vi = aj + ai[i];
773           nz = ai[i+1] - ai[i];
774           s[0] = b[i2]; s[1] = b[i2+1];
775           for (j=0; j<nz; j++) {
776             idx = 2*vi[j];
777             it  = 4*j;
778             xw[0] = x[idx]; xw[1] = x[1+idx];
779             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
780           }
781           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
782           x[i2]  += xw[0]; x[i2+1] += xw[1];
783           idiag  -= 4;
784           i2     -= 2;
785         }
786         break;
787       case 3:
788         for (i=m-1; i>=0; i--) {
789           v  = aa + 9*ai[i];
790           vi = aj + ai[i];
791           nz = ai[i+1] - ai[i];
792           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
793           while (nz--) {
794             idx = 3*(*vi++);
795             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
796             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
797             v  += 9;
798           }
799           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
800           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
801           idiag  -= 9;
802           i2     -= 3;
803         }
804         break;
805       case 4:
806         for (i=m-1; i>=0; i--) {
807           v  = aa + 16*ai[i];
808           vi = aj + ai[i];
809           nz = ai[i+1] - ai[i];
810           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
811           while (nz--) {
812             idx = 4*(*vi++);
813             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
814             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
815             v  += 16;
816           }
817           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
818           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
819           idiag  -= 16;
820           i2     -= 4;
821         }
822         break;
823       case 5:
824         for (i=m-1; i>=0; i--) {
825           v  = aa + 25*ai[i];
826           vi = aj + ai[i];
827           nz = ai[i+1] - ai[i];
828           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
829           while (nz--) {
830             idx = 5*(*vi++);
831             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
832             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
833             v  += 25;
834           }
835           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
836           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
837           idiag  -= 25;
838           i2     -= 5;
839         }
840         break;
841       case 6:
842         for (i=m-1; i>=0; i--) {
843           v  = aa + 36*ai[i];
844           vi = aj + ai[i];
845           nz = ai[i+1] - ai[i];
846           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
847           while (nz--) {
848             idx = 6*(*vi++);
849             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
850             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
851             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
852             v  += 36;
853           }
854           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
855           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
856           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
857           idiag  -= 36;
858           i2     -= 6;
859         }
860         break;
861       case 7:
862         for (i=m-1; i>=0; i--) {
863           v  = aa + 49*ai[i];
864           vi = aj + ai[i];
865           nz = ai[i+1] - ai[i];
866           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
867           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
868           while (nz--) {
869             idx = 7*(*vi++);
870             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
871             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
872             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
873             v  += 49;
874           }
875           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
876           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
877           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
878           idiag  -= 49;
879           i2     -= 7;
880         }
881         break;
882       default:
883         for (i=m-1; i>=0; i--) {
884           v  = aa + bs2*ai[i];
885           vi = aj + ai[i];
886           nz = ai[i+1] - ai[i];
887 
888           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
889           /* copy all rows of x that are needed into contiguous space */
890           workt = work;
891           for (j=0; j<nz; j++) {
892             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
893             workt += bs;
894           }
895           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
896           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
897 
898           idiag -= bs2;
899           i2    -= bs;
900         }
901         break;
902       }
903       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
904     }
905   }
906   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
907   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 
912 /*
913     Special version for direct calls from Fortran (Used in PETSc-fun3d)
914 */
915 #if defined(PETSC_HAVE_FORTRAN_CAPS)
916 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
917 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
918 #define matsetvaluesblocked4_ matsetvaluesblocked4
919 #endif
920 
921 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
922 {
923   Mat               A  = *AA;
924   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
925   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
926   PetscInt          *ai    =a->i,*ailen=a->ilen;
927   PetscInt          *aj    =a->j,stepval,lastcol = -1;
928   const PetscScalar *value = v;
929   MatScalar         *ap,*aa = a->a,*bap;
930 
931   PetscFunctionBegin;
932   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
933   stepval = (n-1)*4;
934   for (k=0; k<m; k++) { /* loop over added rows */
935     row  = im[k];
936     rp   = aj + ai[row];
937     ap   = aa + 16*ai[row];
938     nrow = ailen[row];
939     low  = 0;
940     high = nrow;
941     for (l=0; l<n; l++) { /* loop over added columns */
942       col = in[l];
943       if (col <= lastcol)  low = 0;
944       else                high = nrow;
945       lastcol = col;
946       value   = v + k*(stepval+4 + l)*4;
947       while (high-low > 7) {
948         t = (low+high)/2;
949         if (rp[t] > col) high = t;
950         else             low  = t;
951       }
952       for (i=low; i<high; i++) {
953         if (rp[i] > col) break;
954         if (rp[i] == col) {
955           bap = ap +  16*i;
956           for (ii=0; ii<4; ii++,value+=stepval) {
957             for (jj=ii; jj<16; jj+=4) {
958               bap[jj] += *value++;
959             }
960           }
961           goto noinsert2;
962         }
963       }
964       N = nrow++ - 1;
965       high++; /* added new column index thus must search to one higher than before */
966       /* shift up all the later entries in this row */
967       for (ii=N; ii>=i; ii--) {
968         rp[ii+1] = rp[ii];
969         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
970       }
971       if (N >= i) {
972         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
973       }
974       rp[i] = col;
975       bap   = ap +  16*i;
976       for (ii=0; ii<4; ii++,value+=stepval) {
977         for (jj=ii; jj<16; jj+=4) {
978           bap[jj] = *value++;
979         }
980       }
981       noinsert2:;
982       low = i;
983     }
984     ailen[row] = nrow;
985   }
986   PetscFunctionReturnVoid();
987 }
988 
989 #if defined(PETSC_HAVE_FORTRAN_CAPS)
990 #define matsetvalues4_ MATSETVALUES4
991 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
992 #define matsetvalues4_ matsetvalues4
993 #endif
994 
995 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
996 {
997   Mat         A  = *AA;
998   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
999   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1000   PetscInt    *ai=a->i,*ailen=a->ilen;
1001   PetscInt    *aj=a->j,brow,bcol;
1002   PetscInt    ridx,cidx,lastcol = -1;
1003   MatScalar   *ap,value,*aa=a->a,*bap;
1004 
1005   PetscFunctionBegin;
1006   for (k=0; k<m; k++) { /* loop over added rows */
1007     row  = im[k]; brow = row/4;
1008     rp   = aj + ai[brow];
1009     ap   = aa + 16*ai[brow];
1010     nrow = ailen[brow];
1011     low  = 0;
1012     high = nrow;
1013     for (l=0; l<n; l++) { /* loop over added columns */
1014       col   = in[l]; bcol = col/4;
1015       ridx  = row % 4; cidx = col % 4;
1016       value = v[l + k*n];
1017       if (col <= lastcol)  low = 0;
1018       else                high = nrow;
1019       lastcol = col;
1020       while (high-low > 7) {
1021         t = (low+high)/2;
1022         if (rp[t] > bcol) high = t;
1023         else              low  = t;
1024       }
1025       for (i=low; i<high; i++) {
1026         if (rp[i] > bcol) break;
1027         if (rp[i] == bcol) {
1028           bap   = ap +  16*i + 4*cidx + ridx;
1029           *bap += value;
1030           goto noinsert1;
1031         }
1032       }
1033       N = nrow++ - 1;
1034       high++; /* added new column thus must search to one higher than before */
1035       /* shift up all the later entries in this row */
1036       for (ii=N; ii>=i; ii--) {
1037         rp[ii+1] = rp[ii];
1038         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1039       }
1040       if (N>=i) {
1041         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1042       }
1043       rp[i]                    = bcol;
1044       ap[16*i + 4*cidx + ridx] = value;
1045 noinsert1:;
1046       low = i;
1047     }
1048     ailen[brow] = nrow;
1049   }
1050   PetscFunctionReturnVoid();
1051 }
1052 
1053 /*
1054      Checks for missing diagonals
1055 */
1056 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1057 {
1058   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1059   PetscErrorCode ierr;
1060   PetscInt       *diag,*ii = a->i,i;
1061 
1062   PetscFunctionBegin;
1063   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1064   *missing = PETSC_FALSE;
1065   if (A->rmap->n > 0 && !ii) {
1066     *missing = PETSC_TRUE;
1067     if (d) *d = 0;
1068     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1069   } else {
1070     diag = a->diag;
1071     for (i=0; i<a->mbs; i++) {
1072       if (diag[i] >= ii[i+1]) {
1073         *missing = PETSC_TRUE;
1074         if (d) *d = i;
1075         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1076         break;
1077       }
1078     }
1079   }
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1084 {
1085   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1086   PetscErrorCode ierr;
1087   PetscInt       i,j,m = a->mbs;
1088 
1089   PetscFunctionBegin;
1090   if (!a->diag) {
1091     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1092     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1093     a->free_diag = PETSC_TRUE;
1094   }
1095   for (i=0; i<m; i++) {
1096     a->diag[i] = a->i[i+1];
1097     for (j=a->i[i]; j<a->i[i+1]; j++) {
1098       if (a->j[j] == i) {
1099         a->diag[i] = j;
1100         break;
1101       }
1102     }
1103   }
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 
1108 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1109 {
1110   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1111   PetscErrorCode ierr;
1112   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1113   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1114 
1115   PetscFunctionBegin;
1116   *nn = n;
1117   if (!ia) PetscFunctionReturn(0);
1118   if (symmetric) {
1119     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1120     nz   = tia[n];
1121   } else {
1122     tia = a->i; tja = a->j;
1123   }
1124 
1125   if (!blockcompressed && bs > 1) {
1126     (*nn) *= bs;
1127     /* malloc & create the natural set of indices */
1128     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1129     if (n) {
1130       (*ia)[0] = oshift;
1131       for (j=1; j<bs; j++) {
1132         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1133       }
1134     }
1135 
1136     for (i=1; i<n; i++) {
1137       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1138       for (j=1; j<bs; j++) {
1139         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1140       }
1141     }
1142     if (n) {
1143       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1144     }
1145 
1146     if (inja) {
1147       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1148       cnt = 0;
1149       for (i=0; i<n; i++) {
1150         for (j=0; j<bs; j++) {
1151           for (k=tia[i]; k<tia[i+1]; k++) {
1152             for (l=0; l<bs; l++) {
1153               (*ja)[cnt++] = bs*tja[k] + l;
1154             }
1155           }
1156         }
1157       }
1158     }
1159 
1160     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1161       ierr = PetscFree(tia);CHKERRQ(ierr);
1162       ierr = PetscFree(tja);CHKERRQ(ierr);
1163     }
1164   } else if (oshift == 1) {
1165     if (symmetric) {
1166       nz = tia[A->rmap->n/bs];
1167       /*  add 1 to i and j indices */
1168       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1169       *ia = tia;
1170       if (ja) {
1171         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1172         *ja = tja;
1173       }
1174     } else {
1175       nz = a->i[A->rmap->n/bs];
1176       /* malloc space and  add 1 to i and j indices */
1177       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1178       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1179       if (ja) {
1180         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1181         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1182       }
1183     }
1184   } else {
1185     *ia = tia;
1186     if (ja) *ja = tja;
1187   }
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1192 {
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196   if (!ia) PetscFunctionReturn(0);
1197   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1198     ierr = PetscFree(*ia);CHKERRQ(ierr);
1199     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1200   }
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1205 {
1206   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1207   PetscErrorCode ierr;
1208 
1209   PetscFunctionBegin;
1210 #if defined(PETSC_USE_LOG)
1211   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1212 #endif
1213   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1214   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1215   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1216   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1217   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1218   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1219   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1220   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1221   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1222   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1223   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1224   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1225   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1226 
1227   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1228   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1229   ierr = PetscFree(A->data);CHKERRQ(ierr);
1230 
1231   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1242 #if defined(PETSC_HAVE_HYPRE)
1243   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1244 #endif
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1249 {
1250   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1251   PetscErrorCode ierr;
1252 
1253   PetscFunctionBegin;
1254   switch (op) {
1255   case MAT_ROW_ORIENTED:
1256     a->roworiented = flg;
1257     break;
1258   case MAT_KEEP_NONZERO_PATTERN:
1259     a->keepnonzeropattern = flg;
1260     break;
1261   case MAT_NEW_NONZERO_LOCATIONS:
1262     a->nonew = (flg ? 0 : 1);
1263     break;
1264   case MAT_NEW_NONZERO_LOCATION_ERR:
1265     a->nonew = (flg ? -1 : 0);
1266     break;
1267   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1268     a->nonew = (flg ? -2 : 0);
1269     break;
1270   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1271     a->nounused = (flg ? -1 : 0);
1272     break;
1273   case MAT_NEW_DIAGONALS:
1274   case MAT_IGNORE_OFF_PROC_ENTRIES:
1275   case MAT_USE_HASH_TABLE:
1276     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1277     break;
1278   case MAT_SPD:
1279   case MAT_SYMMETRIC:
1280   case MAT_STRUCTURALLY_SYMMETRIC:
1281   case MAT_HERMITIAN:
1282   case MAT_SYMMETRY_ETERNAL:
1283   case MAT_SUBMAT_SINGLEIS:
1284   case MAT_STRUCTURE_ONLY:
1285     /* These options are handled directly by MatSetOption() */
1286     break;
1287   default:
1288     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1289   }
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1294 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1295 {
1296   PetscErrorCode ierr;
1297   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1298   MatScalar      *aa_i;
1299   PetscScalar    *v_i;
1300 
1301   PetscFunctionBegin;
1302   bs  = A->rmap->bs;
1303   bs2 = bs*bs;
1304   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1305 
1306   bn  = row/bs;   /* Block number */
1307   bp  = row % bs; /* Block Position */
1308   M   = ai[bn+1] - ai[bn];
1309   *nz = bs*M;
1310 
1311   if (v) {
1312     *v = 0;
1313     if (*nz) {
1314       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1315       for (i=0; i<M; i++) { /* for each block in the block row */
1316         v_i  = *v + i*bs;
1317         aa_i = aa + bs2*(ai[bn] + i);
1318         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1319       }
1320     }
1321   }
1322 
1323   if (idx) {
1324     *idx = 0;
1325     if (*nz) {
1326       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1327       for (i=0; i<M; i++) { /* for each block in the block row */
1328         idx_i = *idx + i*bs;
1329         itmp  = bs*aj[ai[bn] + i];
1330         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1331       }
1332     }
1333   }
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1338 {
1339   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1340   PetscErrorCode ierr;
1341 
1342   PetscFunctionBegin;
1343   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1348 {
1349   PetscErrorCode ierr;
1350 
1351   PetscFunctionBegin;
1352   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1353   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1358 {
1359   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1360   Mat            C;
1361   PetscErrorCode ierr;
1362   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1363   PetscInt       *rows,*cols,bs2=a->bs2;
1364   MatScalar      *array;
1365 
1366   PetscFunctionBegin;
1367   if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1368   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1369     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1370 
1371     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1372     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1373     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1374     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1375     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1376     ierr = PetscFree(col);CHKERRQ(ierr);
1377   } else {
1378     C = *B;
1379   }
1380 
1381   array = a->a;
1382   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1383   for (i=0; i<mbs; i++) {
1384     cols[0] = i*bs;
1385     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1386     len = ai[i+1] - ai[i];
1387     for (j=0; j<len; j++) {
1388       rows[0] = (*aj++)*bs;
1389       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1390       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1391       array += bs2;
1392     }
1393   }
1394   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1395 
1396   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1397   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1398 
1399   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1400     *B = C;
1401   } else {
1402     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1408 {
1409   PetscErrorCode ierr;
1410   Mat            Btrans;
1411 
1412   PetscFunctionBegin;
1413   *f   = PETSC_FALSE;
1414   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1415   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1416   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1421 {
1422   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1423   PetscErrorCode ierr;
1424   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1425   int            fd;
1426   PetscScalar    *aa;
1427   FILE           *file;
1428 
1429   PetscFunctionBegin;
1430   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1431   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1432   col_lens[0] = MAT_FILE_CLASSID;
1433 
1434   col_lens[1] = A->rmap->N;
1435   col_lens[2] = A->cmap->n;
1436   col_lens[3] = a->nz*bs2;
1437 
1438   /* store lengths of each row and write (including header) to file */
1439   count = 0;
1440   for (i=0; i<a->mbs; i++) {
1441     for (j=0; j<bs; j++) {
1442       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1443     }
1444   }
1445   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1446   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1447 
1448   /* store column indices (zero start index) */
1449   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1450   count = 0;
1451   for (i=0; i<a->mbs; i++) {
1452     for (j=0; j<bs; j++) {
1453       for (k=a->i[i]; k<a->i[i+1]; k++) {
1454         for (l=0; l<bs; l++) {
1455           jj[count++] = bs*a->j[k] + l;
1456         }
1457       }
1458     }
1459   }
1460   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1461   ierr = PetscFree(jj);CHKERRQ(ierr);
1462 
1463   /* store nonzero values */
1464   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1465   count = 0;
1466   for (i=0; i<a->mbs; i++) {
1467     for (j=0; j<bs; j++) {
1468       for (k=a->i[i]; k<a->i[i+1]; k++) {
1469         for (l=0; l<bs; l++) {
1470           aa[count++] = a->a[bs2*k + l*bs + j];
1471         }
1472       }
1473     }
1474   }
1475   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1476   ierr = PetscFree(aa);CHKERRQ(ierr);
1477 
1478   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1479   if (file) {
1480     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1486 {
1487   PetscErrorCode ierr;
1488   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1489   PetscInt       i,bs = A->rmap->bs,k;
1490 
1491   PetscFunctionBegin;
1492   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1493   for (i=0; i<a->mbs; i++) {
1494     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1495     for (k=a->i[i]; k<a->i[i+1]; k++) {
1496       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1497     }
1498     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1499   }
1500   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1505 {
1506   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1507   PetscErrorCode    ierr;
1508   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1509   PetscViewerFormat format;
1510 
1511   PetscFunctionBegin;
1512   if (A->structure_only) {
1513     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1514     PetscFunctionReturn(0);
1515   }
1516 
1517   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1518   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1519     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1520   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1521     const char *matname;
1522     Mat        aij;
1523     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1524     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1525     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1526     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1527     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1528   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1529       PetscFunctionReturn(0);
1530   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1531     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1532     for (i=0; i<a->mbs; i++) {
1533       for (j=0; j<bs; j++) {
1534         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1535         for (k=a->i[i]; k<a->i[i+1]; k++) {
1536           for (l=0; l<bs; l++) {
1537 #if defined(PETSC_USE_COMPLEX)
1538             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1539               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1540                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1541             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1542               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1543                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1544             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1545               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1546             }
1547 #else
1548             if (a->a[bs2*k + l*bs + j] != 0.0) {
1549               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1550             }
1551 #endif
1552           }
1553         }
1554         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1555       }
1556     }
1557     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1558   } else {
1559     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1560     for (i=0; i<a->mbs; i++) {
1561       for (j=0; j<bs; j++) {
1562         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1563         for (k=a->i[i]; k<a->i[i+1]; k++) {
1564           for (l=0; l<bs; l++) {
1565 #if defined(PETSC_USE_COMPLEX)
1566             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1568                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1571                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1572             } else {
1573               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1574             }
1575 #else
1576             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1577 #endif
1578           }
1579         }
1580         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1581       }
1582     }
1583     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1584   }
1585   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #include <petscdraw.h>
1590 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1591 {
1592   Mat               A = (Mat) Aa;
1593   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1594   PetscErrorCode    ierr;
1595   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1596   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1597   MatScalar         *aa;
1598   PetscViewer       viewer;
1599   PetscViewerFormat format;
1600 
1601   PetscFunctionBegin;
1602   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1603   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1604   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1605 
1606   /* loop over matrix elements drawing boxes */
1607 
1608   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1609     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1610     /* Blue for negative, Cyan for zero and  Red for positive */
1611     color = PETSC_DRAW_BLUE;
1612     for (i=0,row=0; i<mbs; i++,row+=bs) {
1613       for (j=a->i[i]; j<a->i[i+1]; j++) {
1614         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1615         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1616         aa  = a->a + j*bs2;
1617         for (k=0; k<bs; k++) {
1618           for (l=0; l<bs; l++) {
1619             if (PetscRealPart(*aa++) >=  0.) continue;
1620             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1621           }
1622         }
1623       }
1624     }
1625     color = PETSC_DRAW_CYAN;
1626     for (i=0,row=0; i<mbs; i++,row+=bs) {
1627       for (j=a->i[i]; j<a->i[i+1]; j++) {
1628         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1629         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1630         aa  = a->a + j*bs2;
1631         for (k=0; k<bs; k++) {
1632           for (l=0; l<bs; l++) {
1633             if (PetscRealPart(*aa++) != 0.) continue;
1634             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1635           }
1636         }
1637       }
1638     }
1639     color = PETSC_DRAW_RED;
1640     for (i=0,row=0; i<mbs; i++,row+=bs) {
1641       for (j=a->i[i]; j<a->i[i+1]; j++) {
1642         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1643         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1644         aa  = a->a + j*bs2;
1645         for (k=0; k<bs; k++) {
1646           for (l=0; l<bs; l++) {
1647             if (PetscRealPart(*aa++) <= 0.) continue;
1648             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1649           }
1650         }
1651       }
1652     }
1653     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1654   } else {
1655     /* use contour shading to indicate magnitude of values */
1656     /* first determine max of all nonzero values */
1657     PetscReal minv = 0.0, maxv = 0.0;
1658     PetscDraw popup;
1659 
1660     for (i=0; i<a->nz*a->bs2; i++) {
1661       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1662     }
1663     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1664     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1665     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1666 
1667     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1668     for (i=0,row=0; i<mbs; i++,row+=bs) {
1669       for (j=a->i[i]; j<a->i[i+1]; j++) {
1670         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1671         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1672         aa  = a->a + j*bs2;
1673         for (k=0; k<bs; k++) {
1674           for (l=0; l<bs; l++) {
1675             MatScalar v = *aa++;
1676             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1677             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1678           }
1679         }
1680       }
1681     }
1682     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1683   }
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1688 {
1689   PetscErrorCode ierr;
1690   PetscReal      xl,yl,xr,yr,w,h;
1691   PetscDraw      draw;
1692   PetscBool      isnull;
1693 
1694   PetscFunctionBegin;
1695   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1696   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1697   if (isnull) PetscFunctionReturn(0);
1698 
1699   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1700   xr  += w;          yr += h;        xl = -w;     yl = -h;
1701   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1702   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1703   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1704   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1705   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1710 {
1711   PetscErrorCode ierr;
1712   PetscBool      iascii,isbinary,isdraw;
1713 
1714   PetscFunctionBegin;
1715   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1716   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1717   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1718   if (iascii) {
1719     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1720   } else if (isbinary) {
1721     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1722   } else if (isdraw) {
1723     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1724   } else {
1725     Mat B;
1726     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1727     ierr = MatView(B,viewer);CHKERRQ(ierr);
1728     ierr = MatDestroy(&B);CHKERRQ(ierr);
1729   }
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 
1734 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1735 {
1736   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1737   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1738   PetscInt    *ai = a->i,*ailen = a->ilen;
1739   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1740   MatScalar   *ap,*aa = a->a;
1741 
1742   PetscFunctionBegin;
1743   for (k=0; k<m; k++) { /* loop over rows */
1744     row = im[k]; brow = row/bs;
1745     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1746     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1747     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1748     nrow = ailen[brow];
1749     for (l=0; l<n; l++) { /* loop over columns */
1750       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1751       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1752       col  = in[l];
1753       bcol = col/bs;
1754       cidx = col%bs;
1755       ridx = row%bs;
1756       high = nrow;
1757       low  = 0; /* assume unsorted */
1758       while (high-low > 5) {
1759         t = (low+high)/2;
1760         if (rp[t] > bcol) high = t;
1761         else             low  = t;
1762       }
1763       for (i=low; i<high; i++) {
1764         if (rp[i] > bcol) break;
1765         if (rp[i] == bcol) {
1766           *v++ = ap[bs2*i+bs*cidx+ridx];
1767           goto finished;
1768         }
1769       }
1770       *v++ = 0.0;
1771 finished:;
1772     }
1773   }
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1778 {
1779   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1780   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1781   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1782   PetscErrorCode    ierr;
1783   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1784   PetscBool         roworiented=a->roworiented;
1785   const PetscScalar *value     = v;
1786   MatScalar         *ap=NULL,*aa = a->a,*bap;
1787 
1788   PetscFunctionBegin;
1789   if (roworiented) {
1790     stepval = (n-1)*bs;
1791   } else {
1792     stepval = (m-1)*bs;
1793   }
1794   for (k=0; k<m; k++) { /* loop over added rows */
1795     row = im[k];
1796     if (row < 0) continue;
1797 #if defined(PETSC_USE_DEBUG)
1798     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1799 #endif
1800     rp   = aj + ai[row];
1801     if (!A->structure_only) ap = aa + bs2*ai[row];
1802     rmax = imax[row];
1803     nrow = ailen[row];
1804     low  = 0;
1805     high = nrow;
1806     for (l=0; l<n; l++) { /* loop over added columns */
1807       if (in[l] < 0) continue;
1808 #if defined(PETSC_USE_DEBUG)
1809       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1810 #endif
1811       col = in[l];
1812       if (!A->structure_only) {
1813         if (roworiented) {
1814           value = v + (k*(stepval+bs) + l)*bs;
1815         } else {
1816           value = v + (l*(stepval+bs) + k)*bs;
1817         }
1818       }
1819       if (col <= lastcol) low = 0;
1820       else high = nrow;
1821       lastcol = col;
1822       while (high-low > 7) {
1823         t = (low+high)/2;
1824         if (rp[t] > col) high = t;
1825         else             low  = t;
1826       }
1827       for (i=low; i<high; i++) {
1828         if (rp[i] > col) break;
1829         if (rp[i] == col) {
1830           if (A->structure_only) goto noinsert2;
1831           bap = ap +  bs2*i;
1832           if (roworiented) {
1833             if (is == ADD_VALUES) {
1834               for (ii=0; ii<bs; ii++,value+=stepval) {
1835                 for (jj=ii; jj<bs2; jj+=bs) {
1836                   bap[jj] += *value++;
1837                 }
1838               }
1839             } else {
1840               for (ii=0; ii<bs; ii++,value+=stepval) {
1841                 for (jj=ii; jj<bs2; jj+=bs) {
1842                   bap[jj] = *value++;
1843                 }
1844               }
1845             }
1846           } else {
1847             if (is == ADD_VALUES) {
1848               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1849                 for (jj=0; jj<bs; jj++) {
1850                   bap[jj] += value[jj];
1851                 }
1852                 bap += bs;
1853               }
1854             } else {
1855               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1856                 for (jj=0; jj<bs; jj++) {
1857                   bap[jj]  = value[jj];
1858                 }
1859                 bap += bs;
1860               }
1861             }
1862           }
1863           goto noinsert2;
1864         }
1865       }
1866       if (nonew == 1) goto noinsert2;
1867       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1868       if (A->structure_only) {
1869         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1870       } else {
1871         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1872       }
1873       N = nrow++ - 1; high++;
1874       /* shift up all the later entries in this row */
1875       for (ii=N; ii>=i; ii--) {
1876         rp[ii+1] = rp[ii];
1877         if (!A->structure_only) {
1878           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1879         }
1880       }
1881       if (N >= i && !A->structure_only) {
1882         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1883       }
1884 
1885       rp[i] = col;
1886       if (!A->structure_only) {
1887         bap   = ap +  bs2*i;
1888         if (roworiented) {
1889           for (ii=0; ii<bs; ii++,value+=stepval) {
1890             for (jj=ii; jj<bs2; jj+=bs) {
1891               bap[jj] = *value++;
1892             }
1893           }
1894         } else {
1895           for (ii=0; ii<bs; ii++,value+=stepval) {
1896             for (jj=0; jj<bs; jj++) {
1897               *bap++ = *value++;
1898             }
1899           }
1900         }
1901       }
1902 noinsert2:;
1903       low = i;
1904     }
1905     ailen[row] = nrow;
1906   }
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1911 {
1912   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1913   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1914   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1915   PetscErrorCode ierr;
1916   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1917   MatScalar      *aa  = a->a,*ap;
1918   PetscReal      ratio=0.6;
1919 
1920   PetscFunctionBegin;
1921   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1922 
1923   if (m) rmax = ailen[0];
1924   for (i=1; i<mbs; i++) {
1925     /* move each row back by the amount of empty slots (fshift) before it*/
1926     fshift += imax[i-1] - ailen[i-1];
1927     rmax    = PetscMax(rmax,ailen[i]);
1928     if (fshift) {
1929       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1930       N  = ailen[i];
1931       for (j=0; j<N; j++) {
1932         ip[j-fshift] = ip[j];
1933         if (!A->structure_only) {
1934           ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1935         }
1936       }
1937     }
1938     ai[i] = ai[i-1] + ailen[i-1];
1939   }
1940   if (mbs) {
1941     fshift += imax[mbs-1] - ailen[mbs-1];
1942     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1943   }
1944 
1945   /* reset ilen and imax for each row */
1946   a->nonzerorowcnt = 0;
1947   if (A->structure_only) {
1948     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1949   } else { /* !A->structure_only */
1950     for (i=0; i<mbs; i++) {
1951       ailen[i] = imax[i] = ai[i+1] - ai[i];
1952       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1953     }
1954   }
1955   a->nz = ai[mbs];
1956 
1957   /* diagonals may have moved, so kill the diagonal pointers */
1958   a->idiagvalid = PETSC_FALSE;
1959   if (fshift && a->diag) {
1960     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1961     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1962     a->diag = 0;
1963   }
1964   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1965   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1966   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1967   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1968 
1969   A->info.mallocs    += a->reallocs;
1970   a->reallocs         = 0;
1971   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1972   a->rmax             = rmax;
1973 
1974   if (!A->structure_only) {
1975     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1976   }
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 /*
1981    This function returns an array of flags which indicate the locations of contiguous
1982    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1983    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1984    Assume: sizes should be long enough to hold all the values.
1985 */
1986 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1987 {
1988   PetscInt  i,j,k,row;
1989   PetscBool flg;
1990 
1991   PetscFunctionBegin;
1992   for (i=0,j=0; i<n; j++) {
1993     row = idx[i];
1994     if (row%bs!=0) { /* Not the begining of a block */
1995       sizes[j] = 1;
1996       i++;
1997     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1998       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1999       i++;
2000     } else { /* Begining of the block, so check if the complete block exists */
2001       flg = PETSC_TRUE;
2002       for (k=1; k<bs; k++) {
2003         if (row+k != idx[i+k]) { /* break in the block */
2004           flg = PETSC_FALSE;
2005           break;
2006         }
2007       }
2008       if (flg) { /* No break in the bs */
2009         sizes[j] = bs;
2010         i       += bs;
2011       } else {
2012         sizes[j] = 1;
2013         i++;
2014       }
2015     }
2016   }
2017   *bs_max = j;
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2022 {
2023   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2024   PetscErrorCode    ierr;
2025   PetscInt          i,j,k,count,*rows;
2026   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2027   PetscScalar       zero = 0.0;
2028   MatScalar         *aa;
2029   const PetscScalar *xx;
2030   PetscScalar       *bb;
2031 
2032   PetscFunctionBegin;
2033   /* fix right hand side if needed */
2034   if (x && b) {
2035     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2036     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2037     for (i=0; i<is_n; i++) {
2038       bb[is_idx[i]] = diag*xx[is_idx[i]];
2039     }
2040     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2041     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2042   }
2043 
2044   /* Make a copy of the IS and  sort it */
2045   /* allocate memory for rows,sizes */
2046   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2047 
2048   /* copy IS values to rows, and sort them */
2049   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2050   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2051 
2052   if (baij->keepnonzeropattern) {
2053     for (i=0; i<is_n; i++) sizes[i] = 1;
2054     bs_max          = is_n;
2055   } else {
2056     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2057     A->nonzerostate++;
2058   }
2059 
2060   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2061     row = rows[j];
2062     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2063     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2064     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2065     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2066       if (diag != (PetscScalar)0.0) {
2067         if (baij->ilen[row/bs] > 0) {
2068           baij->ilen[row/bs]       = 1;
2069           baij->j[baij->i[row/bs]] = row/bs;
2070 
2071           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2072         }
2073         /* Now insert all the diagonal values for this bs */
2074         for (k=0; k<bs; k++) {
2075           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2076         }
2077       } else { /* (diag == 0.0) */
2078         baij->ilen[row/bs] = 0;
2079       } /* end (diag == 0.0) */
2080     } else { /* (sizes[i] != bs) */
2081 #if defined(PETSC_USE_DEBUG)
2082       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2083 #endif
2084       for (k=0; k<count; k++) {
2085         aa[0] =  zero;
2086         aa   += bs;
2087       }
2088       if (diag != (PetscScalar)0.0) {
2089         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2090       }
2091     }
2092   }
2093 
2094   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2095   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2100 {
2101   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2102   PetscErrorCode    ierr;
2103   PetscInt          i,j,k,count;
2104   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2105   PetscScalar       zero = 0.0;
2106   MatScalar         *aa;
2107   const PetscScalar *xx;
2108   PetscScalar       *bb;
2109   PetscBool         *zeroed,vecs = PETSC_FALSE;
2110 
2111   PetscFunctionBegin;
2112   /* fix right hand side if needed */
2113   if (x && b) {
2114     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2115     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2116     vecs = PETSC_TRUE;
2117   }
2118 
2119   /* zero the columns */
2120   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2121   for (i=0; i<is_n; i++) {
2122     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2123     zeroed[is_idx[i]] = PETSC_TRUE;
2124   }
2125   for (i=0; i<A->rmap->N; i++) {
2126     if (!zeroed[i]) {
2127       row = i/bs;
2128       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2129         for (k=0; k<bs; k++) {
2130           col = bs*baij->j[j] + k;
2131           if (zeroed[col]) {
2132             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2133             if (vecs) bb[i] -= aa[0]*xx[col];
2134             aa[0] = 0.0;
2135           }
2136         }
2137       }
2138     } else if (vecs) bb[i] = diag*xx[i];
2139   }
2140   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2141   if (vecs) {
2142     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2143     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2144   }
2145 
2146   /* zero the rows */
2147   for (i=0; i<is_n; i++) {
2148     row   = is_idx[i];
2149     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2150     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2151     for (k=0; k<count; k++) {
2152       aa[0] =  zero;
2153       aa   += bs;
2154     }
2155     if (diag != (PetscScalar)0.0) {
2156       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2157     }
2158   }
2159   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2164 {
2165   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2166   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2167   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2168   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2169   PetscErrorCode ierr;
2170   PetscInt       ridx,cidx,bs2=a->bs2;
2171   PetscBool      roworiented=a->roworiented;
2172   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2173 
2174   PetscFunctionBegin;
2175   for (k=0; k<m; k++) { /* loop over added rows */
2176     row  = im[k];
2177     brow = row/bs;
2178     if (row < 0) continue;
2179 #if defined(PETSC_USE_DEBUG)
2180     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2181 #endif
2182     rp   = aj + ai[brow];
2183     if (!A->structure_only) ap = aa + bs2*ai[brow];
2184     rmax = imax[brow];
2185     nrow = ailen[brow];
2186     low  = 0;
2187     high = nrow;
2188     for (l=0; l<n; l++) { /* loop over added columns */
2189       if (in[l] < 0) continue;
2190 #if defined(PETSC_USE_DEBUG)
2191       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2192 #endif
2193       col  = in[l]; bcol = col/bs;
2194       ridx = row % bs; cidx = col % bs;
2195       if (!A->structure_only) {
2196         if (roworiented) {
2197           value = v[l + k*n];
2198         } else {
2199           value = v[k + l*m];
2200         }
2201       }
2202       if (col <= lastcol) low = 0; else high = nrow;
2203       lastcol = col;
2204       while (high-low > 7) {
2205         t = (low+high)/2;
2206         if (rp[t] > bcol) high = t;
2207         else              low  = t;
2208       }
2209       for (i=low; i<high; i++) {
2210         if (rp[i] > bcol) break;
2211         if (rp[i] == bcol) {
2212           bap = ap +  bs2*i + bs*cidx + ridx;
2213           if (!A->structure_only) {
2214             if (is == ADD_VALUES) *bap += value;
2215             else                  *bap  = value;
2216           }
2217           goto noinsert1;
2218         }
2219       }
2220       if (nonew == 1) goto noinsert1;
2221       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2222       if (A->structure_only) {
2223         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2224       } else {
2225         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2226       }
2227       N = nrow++ - 1; high++;
2228       /* shift up all the later entries in this row */
2229       for (ii=N; ii>=i; ii--) {
2230         rp[ii+1] = rp[ii];
2231         if (!A->structure_only) {
2232           ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2233         }
2234       }
2235       if (N>=i && !A->structure_only) {
2236         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2237       }
2238       rp[i]                      = bcol;
2239       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2240       a->nz++;
2241       A->nonzerostate++;
2242 noinsert1:;
2243       low = i;
2244     }
2245     ailen[brow] = nrow;
2246   }
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2251 {
2252   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2253   Mat            outA;
2254   PetscErrorCode ierr;
2255   PetscBool      row_identity,col_identity;
2256 
2257   PetscFunctionBegin;
2258   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2259   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2260   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2261   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2262 
2263   outA            = inA;
2264   inA->factortype = MAT_FACTOR_LU;
2265   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2266   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2267 
2268   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2269 
2270   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2271   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2272   a->row = row;
2273   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2274   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2275   a->col = col;
2276 
2277   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2278   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2279   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2280   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2281 
2282   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2283   if (!a->solve_work) {
2284     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2285     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2286   }
2287   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2292 {
2293   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2294   PetscInt    i,nz,mbs;
2295 
2296   PetscFunctionBegin;
2297   nz  = baij->maxnz;
2298   mbs = baij->mbs;
2299   for (i=0; i<nz; i++) {
2300     baij->j[i] = indices[i];
2301   }
2302   baij->nz = nz;
2303   for (i=0; i<mbs; i++) {
2304     baij->ilen[i] = baij->imax[i];
2305   }
2306   PetscFunctionReturn(0);
2307 }
2308 
2309 /*@
2310     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2311        in the matrix.
2312 
2313   Input Parameters:
2314 +  mat - the SeqBAIJ matrix
2315 -  indices - the column indices
2316 
2317   Level: advanced
2318 
2319   Notes:
2320     This can be called if you have precomputed the nonzero structure of the
2321   matrix and want to provide it to the matrix object to improve the performance
2322   of the MatSetValues() operation.
2323 
2324     You MUST have set the correct numbers of nonzeros per row in the call to
2325   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2326 
2327     MUST be called before any calls to MatSetValues();
2328 
2329 @*/
2330 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2331 {
2332   PetscErrorCode ierr;
2333 
2334   PetscFunctionBegin;
2335   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2336   PetscValidPointer(indices,2);
2337   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2342 {
2343   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2344   PetscErrorCode ierr;
2345   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2346   PetscReal      atmp;
2347   PetscScalar    *x,zero = 0.0;
2348   MatScalar      *aa;
2349   PetscInt       ncols,brow,krow,kcol;
2350 
2351   PetscFunctionBegin;
2352   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2353   bs  = A->rmap->bs;
2354   aa  = a->a;
2355   ai  = a->i;
2356   aj  = a->j;
2357   mbs = a->mbs;
2358 
2359   ierr = VecSet(v,zero);CHKERRQ(ierr);
2360   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2361   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2362   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2363   for (i=0; i<mbs; i++) {
2364     ncols = ai[1] - ai[0]; ai++;
2365     brow  = bs*i;
2366     for (j=0; j<ncols; j++) {
2367       for (kcol=0; kcol<bs; kcol++) {
2368         for (krow=0; krow<bs; krow++) {
2369           atmp = PetscAbsScalar(*aa);aa++;
2370           row  = brow + krow;   /* row index */
2371           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2372         }
2373       }
2374       aj++;
2375     }
2376   }
2377   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2382 {
2383   PetscErrorCode ierr;
2384 
2385   PetscFunctionBegin;
2386   /* If the two matrices have the same copy implementation, use fast copy. */
2387   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2388     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2389     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2390     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2391 
2392     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2393     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2394     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2395     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2396   } else {
2397     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2398   }
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2403 {
2404   PetscErrorCode ierr;
2405 
2406   PetscFunctionBegin;
2407   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2412 {
2413   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2414 
2415   PetscFunctionBegin;
2416   *array = a->a;
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2421 {
2422   PetscFunctionBegin;
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2427 {
2428   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2429   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2430   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2431   PetscErrorCode ierr;
2432 
2433   PetscFunctionBegin;
2434   /* Set the number of nonzeros in the new matrix */
2435   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2440 {
2441   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2442   PetscErrorCode ierr;
2443   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2444   PetscBLASInt   one=1;
2445 
2446   PetscFunctionBegin;
2447   if (str == SAME_NONZERO_PATTERN) {
2448     PetscScalar  alpha = a;
2449     PetscBLASInt bnz;
2450     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2451     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2452     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2453   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2454     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2455   } else {
2456     Mat      B;
2457     PetscInt *nnz;
2458     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2459     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2460     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2461     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2462     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2463     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2464     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2465     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2466     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2467     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2468     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2469     ierr = PetscFree(nnz);CHKERRQ(ierr);
2470   }
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2475 {
2476   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2477   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2478   MatScalar   *aa = a->a;
2479 
2480   PetscFunctionBegin;
2481   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2486 {
2487   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2488   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2489   MatScalar   *aa = a->a;
2490 
2491   PetscFunctionBegin;
2492   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 /*
2497     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2498 */
2499 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2500 {
2501   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2502   PetscErrorCode ierr;
2503   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2504   PetscInt       nz = a->i[m],row,*jj,mr,col;
2505 
2506   PetscFunctionBegin;
2507   *nn = n;
2508   if (!ia) PetscFunctionReturn(0);
2509   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2510   else {
2511     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2512     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2513     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2514     jj   = a->j;
2515     for (i=0; i<nz; i++) {
2516       collengths[jj[i]]++;
2517     }
2518     cia[0] = oshift;
2519     for (i=0; i<n; i++) {
2520       cia[i+1] = cia[i] + collengths[i];
2521     }
2522     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2523     jj   = a->j;
2524     for (row=0; row<m; row++) {
2525       mr = a->i[row+1] - a->i[row];
2526       for (i=0; i<mr; i++) {
2527         col = *jj++;
2528 
2529         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2530       }
2531     }
2532     ierr = PetscFree(collengths);CHKERRQ(ierr);
2533     *ia  = cia; *ja = cja;
2534   }
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2539 {
2540   PetscErrorCode ierr;
2541 
2542   PetscFunctionBegin;
2543   if (!ia) PetscFunctionReturn(0);
2544   ierr = PetscFree(*ia);CHKERRQ(ierr);
2545   ierr = PetscFree(*ja);CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 /*
2550  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2551  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2552  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2553  */
2554 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2555 {
2556   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2557   PetscErrorCode ierr;
2558   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2559   PetscInt       nz = a->i[m],row,*jj,mr,col;
2560   PetscInt       *cspidx;
2561 
2562   PetscFunctionBegin;
2563   *nn = n;
2564   if (!ia) PetscFunctionReturn(0);
2565 
2566   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2567   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2568   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2569   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2570   jj   = a->j;
2571   for (i=0; i<nz; i++) {
2572     collengths[jj[i]]++;
2573   }
2574   cia[0] = oshift;
2575   for (i=0; i<n; i++) {
2576     cia[i+1] = cia[i] + collengths[i];
2577   }
2578   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2579   jj   = a->j;
2580   for (row=0; row<m; row++) {
2581     mr = a->i[row+1] - a->i[row];
2582     for (i=0; i<mr; i++) {
2583       col = *jj++;
2584       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2585       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2586     }
2587   }
2588   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2589   *ia    = cia; *ja = cja;
2590   *spidx = cspidx;
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2595 {
2596   PetscErrorCode ierr;
2597 
2598   PetscFunctionBegin;
2599   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2600   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2605 {
2606   PetscErrorCode ierr;
2607   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2608 
2609   PetscFunctionBegin;
2610   if (!Y->preallocated || !aij->nz) {
2611     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2612   }
2613   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2614   PetscFunctionReturn(0);
2615 }
2616 
2617 /* -------------------------------------------------------------------*/
2618 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2619                                        MatGetRow_SeqBAIJ,
2620                                        MatRestoreRow_SeqBAIJ,
2621                                        MatMult_SeqBAIJ_N,
2622                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2623                                        MatMultTranspose_SeqBAIJ,
2624                                        MatMultTransposeAdd_SeqBAIJ,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                /* 10*/ 0,
2629                                        MatLUFactor_SeqBAIJ,
2630                                        0,
2631                                        0,
2632                                        MatTranspose_SeqBAIJ,
2633                                /* 15*/ MatGetInfo_SeqBAIJ,
2634                                        MatEqual_SeqBAIJ,
2635                                        MatGetDiagonal_SeqBAIJ,
2636                                        MatDiagonalScale_SeqBAIJ,
2637                                        MatNorm_SeqBAIJ,
2638                                /* 20*/ 0,
2639                                        MatAssemblyEnd_SeqBAIJ,
2640                                        MatSetOption_SeqBAIJ,
2641                                        MatZeroEntries_SeqBAIJ,
2642                                /* 24*/ MatZeroRows_SeqBAIJ,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                /* 29*/ MatSetUp_SeqBAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                /* 34*/ MatDuplicate_SeqBAIJ,
2653                                        0,
2654                                        0,
2655                                        MatILUFactor_SeqBAIJ,
2656                                        0,
2657                                /* 39*/ MatAXPY_SeqBAIJ,
2658                                        MatCreateSubMatrices_SeqBAIJ,
2659                                        MatIncreaseOverlap_SeqBAIJ,
2660                                        MatGetValues_SeqBAIJ,
2661                                        MatCopy_SeqBAIJ,
2662                                /* 44*/ 0,
2663                                        MatScale_SeqBAIJ,
2664                                        MatShift_SeqBAIJ,
2665                                        0,
2666                                        MatZeroRowsColumns_SeqBAIJ,
2667                                /* 49*/ 0,
2668                                        MatGetRowIJ_SeqBAIJ,
2669                                        MatRestoreRowIJ_SeqBAIJ,
2670                                        MatGetColumnIJ_SeqBAIJ,
2671                                        MatRestoreColumnIJ_SeqBAIJ,
2672                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                        MatSetValuesBlocked_SeqBAIJ,
2677                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2678                                        MatDestroy_SeqBAIJ,
2679                                        MatView_SeqBAIJ,
2680                                        0,
2681                                        0,
2682                                /* 64*/ 0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2688                                        0,
2689                                        MatConvert_Basic,
2690                                        0,
2691                                        0,
2692                                /* 74*/ 0,
2693                                        MatFDColoringApply_BAIJ,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                /* 79*/ 0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        MatLoad_SeqBAIJ,
2702                                /* 84*/ 0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                /* 89*/ 0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                /* 94*/ 0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                /* 99*/ 0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                        0,
2722                                /*104*/ 0,
2723                                        MatRealPart_SeqBAIJ,
2724                                        MatImaginaryPart_SeqBAIJ,
2725                                        0,
2726                                        0,
2727                                /*109*/ 0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                        MatMissingDiagonal_SeqBAIJ,
2732                                /*114*/ 0,
2733                                        0,
2734                                        0,
2735                                        0,
2736                                        0,
2737                                /*119*/ 0,
2738                                        0,
2739                                        MatMultHermitianTranspose_SeqBAIJ,
2740                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2741                                        0,
2742                                /*124*/ 0,
2743                                        0,
2744                                        MatInvertBlockDiagonal_SeqBAIJ,
2745                                        0,
2746                                        0,
2747                                /*129*/ 0,
2748                                        0,
2749                                        0,
2750                                        0,
2751                                        0,
2752                                /*134*/ 0,
2753                                        0,
2754                                        0,
2755                                        0,
2756                                        0,
2757                                /*139*/ MatSetBlockSizes_Default,
2758                                        0,
2759                                        0,
2760                                        MatFDColoringSetUp_SeqXAIJ,
2761                                        0,
2762                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2763                                        MatDestroySubMatrices_SeqBAIJ
2764 };
2765 
2766 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2767 {
2768   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2769   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2770   PetscErrorCode ierr;
2771 
2772   PetscFunctionBegin;
2773   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2774 
2775   /* allocate space for values if not already there */
2776   if (!aij->saved_values) {
2777     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2778     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2779   }
2780 
2781   /* copy values over */
2782   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2787 {
2788   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2789   PetscErrorCode ierr;
2790   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2791 
2792   PetscFunctionBegin;
2793   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2794   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2795 
2796   /* copy values over */
2797   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2802 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2803 
2804 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2805 {
2806   Mat_SeqBAIJ    *b;
2807   PetscErrorCode ierr;
2808   PetscInt       i,mbs,nbs,bs2;
2809   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2810 
2811   PetscFunctionBegin;
2812   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2813   if (nz == MAT_SKIP_ALLOCATION) {
2814     skipallocation = PETSC_TRUE;
2815     nz             = 0;
2816   }
2817 
2818   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2819   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2820   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2821   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2822 
2823   B->preallocated = PETSC_TRUE;
2824 
2825   mbs = B->rmap->n/bs;
2826   nbs = B->cmap->n/bs;
2827   bs2 = bs*bs;
2828 
2829   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2830 
2831   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2832   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2833   if (nnz) {
2834     for (i=0; i<mbs; i++) {
2835       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2836       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2837     }
2838   }
2839 
2840   b    = (Mat_SeqBAIJ*)B->data;
2841   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2842   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2843   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2844 
2845   if (!flg) {
2846     switch (bs) {
2847     case 1:
2848       B->ops->mult    = MatMult_SeqBAIJ_1;
2849       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2850       break;
2851     case 2:
2852       B->ops->mult    = MatMult_SeqBAIJ_2;
2853       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2854       break;
2855     case 3:
2856       B->ops->mult    = MatMult_SeqBAIJ_3;
2857       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2858       break;
2859     case 4:
2860       B->ops->mult    = MatMult_SeqBAIJ_4;
2861       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2862       break;
2863     case 5:
2864       B->ops->mult    = MatMult_SeqBAIJ_5;
2865       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2866       break;
2867     case 6:
2868       B->ops->mult    = MatMult_SeqBAIJ_6;
2869       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2870       break;
2871     case 7:
2872       B->ops->mult    = MatMult_SeqBAIJ_7;
2873       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2874       break;
2875     case 9:
2876 #if defined(__INTEL_COMPILER) && (defined(__AVX2__) || defined (__AVX512F__))
2877       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2878       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2879 #else
2880       B->ops->mult    = MatMult_SeqBAIJ_N;
2881       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2882 #endif
2883       break;
2884     case 11:
2885       B->ops->mult    = MatMult_SeqBAIJ_11;
2886       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2887       break;
2888     case 15:
2889       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2890       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2891       break;
2892     default:
2893       B->ops->mult    = MatMult_SeqBAIJ_N;
2894       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2895       break;
2896     }
2897   }
2898   B->ops->sor = MatSOR_SeqBAIJ;
2899   b->mbs = mbs;
2900   b->nbs = nbs;
2901   if (!skipallocation) {
2902     if (!b->imax) {
2903       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2904       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2905 
2906       b->free_imax_ilen = PETSC_TRUE;
2907     }
2908     /* b->ilen will count nonzeros in each block row so far. */
2909     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2910     if (!nnz) {
2911       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2912       else if (nz < 0) nz = 1;
2913       for (i=0; i<mbs; i++) b->imax[i] = nz;
2914       nz = nz*mbs;
2915     } else {
2916       nz = 0;
2917       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2918     }
2919 
2920     /* allocate the matrix space */
2921     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2922     if (B->structure_only) {
2923       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2924       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2925       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2926     } else {
2927       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2928       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2929       ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2930     }
2931     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2932 
2933     if (B->structure_only) {
2934       b->singlemalloc = PETSC_FALSE;
2935       b->free_a       = PETSC_FALSE;
2936     } else {
2937       b->singlemalloc = PETSC_TRUE;
2938       b->free_a       = PETSC_TRUE;
2939     }
2940     b->free_ij = PETSC_TRUE;
2941 
2942     b->i[0] = 0;
2943     for (i=1; i<mbs+1; i++) {
2944       b->i[i] = b->i[i-1] + b->imax[i-1];
2945     }
2946 
2947   } else {
2948     b->free_a  = PETSC_FALSE;
2949     b->free_ij = PETSC_FALSE;
2950   }
2951 
2952   b->bs2              = bs2;
2953   b->mbs              = mbs;
2954   b->nz               = 0;
2955   b->maxnz            = nz;
2956   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2957   B->was_assembled    = PETSC_FALSE;
2958   B->assembled        = PETSC_FALSE;
2959   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2960   PetscFunctionReturn(0);
2961 }
2962 
2963 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2964 {
2965   PetscInt       i,m,nz,nz_max=0,*nnz;
2966   PetscScalar    *values=0;
2967   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2968   PetscErrorCode ierr;
2969 
2970   PetscFunctionBegin;
2971   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2972   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2973   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2974   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2975   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2976   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2977   m    = B->rmap->n/bs;
2978 
2979   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2980   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2981   for (i=0; i<m; i++) {
2982     nz = ii[i+1]- ii[i];
2983     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2984     nz_max = PetscMax(nz_max, nz);
2985     nnz[i] = nz;
2986   }
2987   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2988   ierr = PetscFree(nnz);CHKERRQ(ierr);
2989 
2990   values = (PetscScalar*)V;
2991   if (!values) {
2992     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2993   }
2994   for (i=0; i<m; i++) {
2995     PetscInt          ncols  = ii[i+1] - ii[i];
2996     const PetscInt    *icols = jj + ii[i];
2997     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2998     if (!roworiented) {
2999       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3000     } else {
3001       PetscInt j;
3002       for (j=0; j<ncols; j++) {
3003         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3004         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3005       }
3006     }
3007   }
3008   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3009   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3010   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3011   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3012   PetscFunctionReturn(0);
3013 }
3014 
3015 /*MC
3016    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3017    block sparse compressed row format.
3018 
3019    Options Database Keys:
3020 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3021 
3022   Level: beginner
3023 
3024 .seealso: MatCreateSeqBAIJ()
3025 M*/
3026 
3027 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3028 
3029 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3030 {
3031   PetscErrorCode ierr;
3032   PetscMPIInt    size;
3033   Mat_SeqBAIJ    *b;
3034 
3035   PetscFunctionBegin;
3036   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3037   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3038 
3039   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3040   B->data = (void*)b;
3041   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3042 
3043   b->row          = 0;
3044   b->col          = 0;
3045   b->icol         = 0;
3046   b->reallocs     = 0;
3047   b->saved_values = 0;
3048 
3049   b->roworiented        = PETSC_TRUE;
3050   b->nonew              = 0;
3051   b->diag               = 0;
3052   B->spptr              = 0;
3053   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3054   b->keepnonzeropattern = PETSC_FALSE;
3055 
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3065 #if defined(PETSC_HAVE_HYPRE)
3066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3067 #endif
3068   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3069   PetscFunctionReturn(0);
3070 }
3071 
3072 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3073 {
3074   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3075   PetscErrorCode ierr;
3076   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3077 
3078   PetscFunctionBegin;
3079   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3080 
3081   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3082     c->imax           = a->imax;
3083     c->ilen           = a->ilen;
3084     c->free_imax_ilen = PETSC_FALSE;
3085   } else {
3086     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3087     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3088     for (i=0; i<mbs; i++) {
3089       c->imax[i] = a->imax[i];
3090       c->ilen[i] = a->ilen[i];
3091     }
3092     c->free_imax_ilen = PETSC_TRUE;
3093   }
3094 
3095   /* allocate the matrix space */
3096   if (mallocmatspace) {
3097     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3098       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3099       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3100 
3101       c->i            = a->i;
3102       c->j            = a->j;
3103       c->singlemalloc = PETSC_FALSE;
3104       c->free_a       = PETSC_TRUE;
3105       c->free_ij      = PETSC_FALSE;
3106       c->parent       = A;
3107       C->preallocated = PETSC_TRUE;
3108       C->assembled    = PETSC_TRUE;
3109 
3110       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3111       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3112       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3113     } else {
3114       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3115       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3116 
3117       c->singlemalloc = PETSC_TRUE;
3118       c->free_a       = PETSC_TRUE;
3119       c->free_ij      = PETSC_TRUE;
3120 
3121       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3122       if (mbs > 0) {
3123         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3124         if (cpvalues == MAT_COPY_VALUES) {
3125           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3126         } else {
3127           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3128         }
3129       }
3130       C->preallocated = PETSC_TRUE;
3131       C->assembled    = PETSC_TRUE;
3132     }
3133   }
3134 
3135   c->roworiented = a->roworiented;
3136   c->nonew       = a->nonew;
3137 
3138   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3139   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3140 
3141   c->bs2         = a->bs2;
3142   c->mbs         = a->mbs;
3143   c->nbs         = a->nbs;
3144 
3145   if (a->diag) {
3146     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3147       c->diag      = a->diag;
3148       c->free_diag = PETSC_FALSE;
3149     } else {
3150       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3151       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3152       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3153       c->free_diag = PETSC_TRUE;
3154     }
3155   } else c->diag = 0;
3156 
3157   c->nz         = a->nz;
3158   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3159   c->solve_work = NULL;
3160   c->mult_work  = NULL;
3161   c->sor_workt  = NULL;
3162   c->sor_work   = NULL;
3163 
3164   c->compressedrow.use   = a->compressedrow.use;
3165   c->compressedrow.nrows = a->compressedrow.nrows;
3166   if (a->compressedrow.use) {
3167     i    = a->compressedrow.nrows;
3168     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3169     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3170     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3171     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3172   } else {
3173     c->compressedrow.use    = PETSC_FALSE;
3174     c->compressedrow.i      = NULL;
3175     c->compressedrow.rindex = NULL;
3176   }
3177   C->nonzerostate = A->nonzerostate;
3178 
3179   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3180   PetscFunctionReturn(0);
3181 }
3182 
3183 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3184 {
3185   PetscErrorCode ierr;
3186 
3187   PetscFunctionBegin;
3188   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3189   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3190   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3191   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3192   PetscFunctionReturn(0);
3193 }
3194 
3195 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3196 {
3197   Mat_SeqBAIJ    *a;
3198   PetscErrorCode ierr;
3199   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3200   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3201   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3202   PetscInt       *masked,nmask,tmp,bs2,ishift;
3203   PetscMPIInt    size;
3204   int            fd;
3205   PetscScalar    *aa;
3206   MPI_Comm       comm;
3207 
3208   PetscFunctionBegin;
3209   /* force binary viewer to load .info file if it has not yet done so */
3210   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3211   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3212   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3213   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3214   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3215   if (bs < 0) bs = 1;
3216   bs2  = bs*bs;
3217 
3218   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3219   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3220   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3221   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3222   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3223   M = header[1]; N = header[2]; nz = header[3];
3224 
3225   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3226   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3227 
3228   /*
3229      This code adds extra rows to make sure the number of rows is
3230     divisible by the blocksize
3231   */
3232   mbs        = M/bs;
3233   extra_rows = bs - M + bs*(mbs);
3234   if (extra_rows == bs) extra_rows = 0;
3235   else mbs++;
3236   if (extra_rows) {
3237     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3238   }
3239 
3240   /* Set global sizes if not already set */
3241   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3242     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3243   } else { /* Check if the matrix global sizes are correct */
3244     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3245     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3246       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3247     }
3248     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3249   }
3250 
3251   /* read in row lengths */
3252   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3253   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3254   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3255 
3256   /* read in column indices */
3257   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3258   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3259   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3260 
3261   /* loop over row lengths determining block row lengths */
3262   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3263   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3264   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3265   rowcount = 0;
3266   nzcount  = 0;
3267   for (i=0; i<mbs; i++) {
3268     nmask = 0;
3269     for (j=0; j<bs; j++) {
3270       kmax = rowlengths[rowcount];
3271       for (k=0; k<kmax; k++) {
3272         tmp = jj[nzcount++]/bs;
3273         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3274       }
3275       rowcount++;
3276     }
3277     browlengths[i] += nmask;
3278     /* zero out the mask elements we set */
3279     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3280   }
3281 
3282   /* Do preallocation  */
3283   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3284   a    = (Mat_SeqBAIJ*)newmat->data;
3285 
3286   /* set matrix "i" values */
3287   a->i[0] = 0;
3288   for (i=1; i<= mbs; i++) {
3289     a->i[i]      = a->i[i-1] + browlengths[i-1];
3290     a->ilen[i-1] = browlengths[i-1];
3291   }
3292   a->nz = 0;
3293   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3294 
3295   /* read in nonzero values */
3296   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3297   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3298   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3299 
3300   /* set "a" and "j" values into matrix */
3301   nzcount = 0; jcount = 0;
3302   for (i=0; i<mbs; i++) {
3303     nzcountb = nzcount;
3304     nmask    = 0;
3305     for (j=0; j<bs; j++) {
3306       kmax = rowlengths[i*bs+j];
3307       for (k=0; k<kmax; k++) {
3308         tmp = jj[nzcount++]/bs;
3309         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3310       }
3311     }
3312     /* sort the masked values */
3313     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3314 
3315     /* set "j" values into matrix */
3316     maskcount = 1;
3317     for (j=0; j<nmask; j++) {
3318       a->j[jcount++]  = masked[j];
3319       mask[masked[j]] = maskcount++;
3320     }
3321     /* set "a" values into matrix */
3322     ishift = bs2*a->i[i];
3323     for (j=0; j<bs; j++) {
3324       kmax = rowlengths[i*bs+j];
3325       for (k=0; k<kmax; k++) {
3326         tmp       = jj[nzcountb]/bs;
3327         block     = mask[tmp] - 1;
3328         point     = jj[nzcountb] - bs*tmp;
3329         idx       = ishift + bs2*block + j + bs*point;
3330         a->a[idx] = (MatScalar)aa[nzcountb++];
3331       }
3332     }
3333     /* zero out the mask elements we set */
3334     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3335   }
3336   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3337 
3338   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3339   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3340   ierr = PetscFree(aa);CHKERRQ(ierr);
3341   ierr = PetscFree(jj);CHKERRQ(ierr);
3342   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3343 
3344   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3345   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3346   PetscFunctionReturn(0);
3347 }
3348 
3349 /*@C
3350    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3351    compressed row) format.  For good matrix assembly performance the
3352    user should preallocate the matrix storage by setting the parameter nz
3353    (or the array nnz).  By setting these parameters accurately, performance
3354    during matrix assembly can be increased by more than a factor of 50.
3355 
3356    Collective on MPI_Comm
3357 
3358    Input Parameters:
3359 +  comm - MPI communicator, set to PETSC_COMM_SELF
3360 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3361           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3362 .  m - number of rows
3363 .  n - number of columns
3364 .  nz - number of nonzero blocks  per block row (same for all rows)
3365 -  nnz - array containing the number of nonzero blocks in the various block rows
3366          (possibly different for each block row) or NULL
3367 
3368    Output Parameter:
3369 .  A - the matrix
3370 
3371    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3372    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3373    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3374 
3375    Options Database Keys:
3376 .   -mat_no_unroll - uses code that does not unroll the loops in the
3377                      block calculations (much slower)
3378 .    -mat_block_size - size of the blocks to use
3379 
3380    Level: intermediate
3381 
3382    Notes:
3383    The number of rows and columns must be divisible by blocksize.
3384 
3385    If the nnz parameter is given then the nz parameter is ignored
3386 
3387    A nonzero block is any block that as 1 or more nonzeros in it
3388 
3389    The block AIJ format is fully compatible with standard Fortran 77
3390    storage.  That is, the stored row and column indices can begin at
3391    either one (as in Fortran) or zero.  See the users' manual for details.
3392 
3393    Specify the preallocated storage with either nz or nnz (not both).
3394    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3395    allocation.  See Users-Manual: ch_mat for details.
3396    matrices.
3397 
3398 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3399 @*/
3400 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3401 {
3402   PetscErrorCode ierr;
3403 
3404   PetscFunctionBegin;
3405   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3406   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3407   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3408   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3409   PetscFunctionReturn(0);
3410 }
3411 
3412 /*@C
3413    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3414    per row in the matrix. For good matrix assembly performance the
3415    user should preallocate the matrix storage by setting the parameter nz
3416    (or the array nnz).  By setting these parameters accurately, performance
3417    during matrix assembly can be increased by more than a factor of 50.
3418 
3419    Collective on MPI_Comm
3420 
3421    Input Parameters:
3422 +  B - the matrix
3423 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3424           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3425 .  nz - number of block nonzeros per block row (same for all rows)
3426 -  nnz - array containing the number of block nonzeros in the various block rows
3427          (possibly different for each block row) or NULL
3428 
3429    Options Database Keys:
3430 .   -mat_no_unroll - uses code that does not unroll the loops in the
3431                      block calculations (much slower)
3432 .   -mat_block_size - size of the blocks to use
3433 
3434    Level: intermediate
3435 
3436    Notes:
3437    If the nnz parameter is given then the nz parameter is ignored
3438 
3439    You can call MatGetInfo() to get information on how effective the preallocation was;
3440    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3441    You can also run with the option -info and look for messages with the string
3442    malloc in them to see if additional memory allocation was needed.
3443 
3444    The block AIJ format is fully compatible with standard Fortran 77
3445    storage.  That is, the stored row and column indices can begin at
3446    either one (as in Fortran) or zero.  See the users' manual for details.
3447 
3448    Specify the preallocated storage with either nz or nnz (not both).
3449    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3450    allocation.  See Users-Manual: ch_mat for details.
3451 
3452 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3453 @*/
3454 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3455 {
3456   PetscErrorCode ierr;
3457 
3458   PetscFunctionBegin;
3459   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3460   PetscValidType(B,1);
3461   PetscValidLogicalCollectiveInt(B,bs,2);
3462   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3463   PetscFunctionReturn(0);
3464 }
3465 
3466 /*@C
3467    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3468    (the default sequential PETSc format).
3469 
3470    Collective on MPI_Comm
3471 
3472    Input Parameters:
3473 +  B - the matrix
3474 .  i - the indices into j for the start of each local row (starts with zero)
3475 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3476 -  v - optional values in the matrix
3477 
3478    Level: developer
3479 
3480    Notes:
3481    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3482    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3483    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3484    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3485    block column and the second index is over columns within a block.
3486 
3487 .keywords: matrix, aij, compressed row, sparse
3488 
3489 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3490 @*/
3491 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3492 {
3493   PetscErrorCode ierr;
3494 
3495   PetscFunctionBegin;
3496   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3497   PetscValidType(B,1);
3498   PetscValidLogicalCollectiveInt(B,bs,2);
3499   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3500   PetscFunctionReturn(0);
3501 }
3502 
3503 
3504 /*@
3505      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3506 
3507      Collective on MPI_Comm
3508 
3509    Input Parameters:
3510 +  comm - must be an MPI communicator of size 1
3511 .  bs - size of block
3512 .  m - number of rows
3513 .  n - number of columns
3514 .  i - row indices
3515 .  j - column indices
3516 -  a - matrix values
3517 
3518    Output Parameter:
3519 .  mat - the matrix
3520 
3521    Level: advanced
3522 
3523    Notes:
3524        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3525     once the matrix is destroyed
3526 
3527        You cannot set new nonzero locations into this matrix, that will generate an error.
3528 
3529        The i and j indices are 0 based
3530 
3531        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3532 
3533       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3534       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3535       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3536       with column-major ordering within blocks.
3537 
3538 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3539 
3540 @*/
3541 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3542 {
3543   PetscErrorCode ierr;
3544   PetscInt       ii;
3545   Mat_SeqBAIJ    *baij;
3546 
3547   PetscFunctionBegin;
3548   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3549   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3550 
3551   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3552   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3553   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3554   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3555   baij = (Mat_SeqBAIJ*)(*mat)->data;
3556   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3557   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3558 
3559   baij->i = i;
3560   baij->j = j;
3561   baij->a = a;
3562 
3563   baij->singlemalloc = PETSC_FALSE;
3564   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3565   baij->free_a       = PETSC_FALSE;
3566   baij->free_ij      = PETSC_FALSE;
3567 
3568   for (ii=0; ii<m; ii++) {
3569     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3570 #if defined(PETSC_USE_DEBUG)
3571     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3572 #endif
3573   }
3574 #if defined(PETSC_USE_DEBUG)
3575   for (ii=0; ii<baij->i[m]; ii++) {
3576     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3577     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3578   }
3579 #endif
3580 
3581   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3582   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3583   PetscFunctionReturn(0);
3584 }
3585 
3586 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3587 {
3588   PetscErrorCode ierr;
3589   PetscMPIInt    size;
3590 
3591   PetscFunctionBegin;
3592   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3593   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3594     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3595   } else {
3596     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3597   }
3598   PetscFunctionReturn(0);
3599 }
3600