xref: /petsc/src/mat/impls/baij/seq/baij2.c (revision 72ce74bd30d5d09d1371a3eb387704f4af395d1a)
1 
2 #include "src/mat/impls/baij/seq/baij.h"
3 #include "src/inline/spops.h"
4 #include "src/inline/ilu.h"
5 #include "petscbt.h"
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
9 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,int is_max,IS is[],int ov)
10 {
11   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
12   int         row,i,j,k,l,m,n,*idx,ierr,*nidx,isz,val,ival;
13   int         start,end,*ai,*aj,bs,*nidx2;
14   PetscBT     table;
15 
16   PetscFunctionBegin;
17   m     = a->mbs;
18   ai    = a->i;
19   aj    = a->j;
20   bs    = a->bs;
21 
22   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
23 
24   ierr = PetscBTCreate(m,table);CHKERRQ(ierr);
25   ierr = PetscMalloc((m+1)*sizeof(int),&nidx);CHKERRQ(ierr);
26   ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr);
27 
28   for (i=0; i<is_max; i++) {
29     /* Initialise the two local arrays */
30     isz  = 0;
31     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
32 
33     /* Extract the indices, assume there can be duplicate entries */
34     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
35     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
36 
37     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
38     for (j=0; j<n ; ++j){
39       ival = idx[j]/bs; /* convert the indices into block indices */
40       if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
41       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
42     }
43     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
44     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
45 
46     k = 0;
47     for (j=0; j<ov; j++){ /* for each overlap*/
48       n = isz;
49       for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
50         row   = nidx[k];
51         start = ai[row];
52         end   = ai[row+1];
53         for (l = start; l<end ; l++){
54           val = aj[l];
55           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
56         }
57       }
58     }
59     /* expand the Index Set */
60     for (j=0; j<isz; j++) {
61       for (k=0; k<bs; k++)
62         nidx2[j*bs+k] = nidx[j]*bs+k;
63     }
64     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
65   }
66   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
67   ierr = PetscFree(nidx);CHKERRQ(ierr);
68   ierr = PetscFree(nidx2);CHKERRQ(ierr);
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private"
74 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
75 {
76   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data,*c;
77   int          *smap,i,k,kstart,kend,ierr,oldcols = a->nbs,*lens;
78   int          row,mat_i,*mat_j,tcol,*mat_ilen;
79   int          *irow,*icol,nrows,ncols,*ssmap,bs=a->bs,bs2=a->bs2;
80   int          *aj = a->j,*ai = a->i;
81   MatScalar    *mat_a;
82   Mat          C;
83   PetscTruth   flag;
84 
85   PetscFunctionBegin;
86   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
87   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
88 
89   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
90   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
91   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
92   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
93 
94   ierr = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr);
95   ssmap = smap;
96   ierr = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr);
97   ierr  = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr);
98   for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
99   /* determine lens of each row */
100   for (i=0; i<nrows; i++) {
101     kstart  = ai[irow[i]];
102     kend    = kstart + a->ilen[irow[i]];
103     lens[i] = 0;
104       for (k=kstart; k<kend; k++) {
105         if (ssmap[aj[k]]) {
106           lens[i]++;
107         }
108       }
109     }
110   /* Create and fill new matrix */
111   if (scall == MAT_REUSE_MATRIX) {
112     c = (Mat_SeqBAIJ *)((*B)->data);
113 
114     if (c->mbs!=nrows || c->nbs!=ncols || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
115     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr);
116     if (flag == PETSC_FALSE) {
117       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
118     }
119     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr);
120     C = *B;
121   } else {
122     ierr = MatCreate(A->comm,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE,&C);CHKERRQ(ierr);
123     ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
124     ierr = MatSeqBAIJSetPreallocation(C,bs,0,lens);CHKERRQ(ierr);
125   }
126   c = (Mat_SeqBAIJ *)(C->data);
127   for (i=0; i<nrows; i++) {
128     row    = irow[i];
129     kstart = ai[row];
130     kend   = kstart + a->ilen[row];
131     mat_i  = c->i[i];
132     mat_j  = c->j + mat_i;
133     mat_a  = c->a + mat_i*bs2;
134     mat_ilen = c->ilen + i;
135     for (k=kstart; k<kend; k++) {
136       if ((tcol=ssmap[a->j[k]])) {
137         *mat_j++ = tcol - 1;
138         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
139         mat_a   += bs2;
140         (*mat_ilen)++;
141       }
142     }
143   }
144 
145   /* Free work space */
146   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
147   ierr = PetscFree(smap);CHKERRQ(ierr);
148   ierr = PetscFree(lens);CHKERRQ(ierr);
149   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
150   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
151 
152   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
153   *B = C;
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ"
159 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
160 {
161   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
162   IS          is1,is2;
163   int         *vary,*iary,*irow,*icol,nrows,ncols,i,ierr,bs=a->bs,count;
164 
165   PetscFunctionBegin;
166   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
167   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
168   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
169   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
170 
171   /* Verify if the indices corespond to each element in a block
172    and form the IS with compressed IS */
173   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr);
174   iary = vary + a->mbs;
175   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
176   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
177   count = 0;
178   for (i=0; i<a->mbs; i++) {
179     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks");
180     if (vary[i]==bs) iary[count++] = i;
181   }
182   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
183 
184   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
185   for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
186   count = 0;
187   for (i=0; i<a->mbs; i++) {
188     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc");
189     if (vary[i]==bs) iary[count++] = i;
190   }
191   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr);
192   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
193   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
194   ierr = PetscFree(vary);CHKERRQ(ierr);
195 
196   ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr);
197   ISDestroy(is1);
198   ISDestroy(is2);
199   PetscFunctionReturn(0);
200 }
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ"
204 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
205 {
206   PetscErrorCode ierr,i;
207 
208   PetscFunctionBegin;
209   if (scall == MAT_INITIAL_MATRIX) {
210     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
211   }
212 
213   for (i=0; i<n; i++) {
214     ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
215   }
216   PetscFunctionReturn(0);
217 }
218 
219 
220 /* -------------------------------------------------------*/
221 /* Should check that shapes of vectors and matrices match */
222 /* -------------------------------------------------------*/
223 #include "petscblaslapack.h"
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatMult_SeqBAIJ_1"
227 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
228 {
229   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
230   PetscScalar     *x,*z,sum;
231   MatScalar       *v;
232   int             mbs=a->mbs,i,*idx,*ii,n,ierr;
233 
234   PetscFunctionBegin;
235   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
236   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
237 
238   idx   = a->j;
239   v     = a->a;
240   ii    = a->i;
241 
242   for (i=0; i<mbs; i++) {
243     n    = ii[1] - ii[0]; ii++;
244     sum  = 0.0;
245     while (n--) sum += *v++ * x[*idx++];
246     z[i] = sum;
247   }
248   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
249   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
250   PetscLogFlops(2*a->nz - A->m);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatMult_SeqBAIJ_2"
256 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
257 {
258   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
259   PetscScalar     *x,*z,*xb,sum1,sum2;
260   PetscScalar     x1,x2;
261   MatScalar       *v;
262   PetscErrorCode ierr;
263   int mbs=a->mbs,i,*idx,*ii,j,n;
264 
265   PetscFunctionBegin;
266   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
267   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
268 
269   idx   = a->j;
270   v     = a->a;
271   ii    = a->i;
272 
273   for (i=0; i<mbs; i++) {
274     n  = ii[1] - ii[0]; ii++;
275     sum1 = 0.0; sum2 = 0.0;
276     for (j=0; j<n; j++) {
277       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
278       sum1 += v[0]*x1 + v[2]*x2;
279       sum2 += v[1]*x1 + v[3]*x2;
280       v += 4;
281     }
282     z[0] = sum1; z[1] = sum2;
283     z += 2;
284   }
285   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
286   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
287   PetscLogFlops(8*a->nz - A->m);
288   PetscFunctionReturn(0);
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatMult_SeqBAIJ_3"
293 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
294 {
295   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
296   PetscScalar  *x,*z,*xb,sum1,sum2,sum3,x1,x2,x3;
297   MatScalar    *v;
298   PetscErrorCode ierr;
299   int  mbs=a->mbs,i,*idx,*ii,j,n;
300 
301 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
302 #pragma disjoint(*v,*z,*xb)
303 #endif
304 
305   PetscFunctionBegin;
306   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
307   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
308 
309   idx   = a->j;
310   v     = a->a;
311   ii    = a->i;
312 
313   for (i=0; i<mbs; i++) {
314     n  = ii[1] - ii[0]; ii++;
315     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
316     for (j=0; j<n; j++) {
317       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
318       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
319       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
320       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
321       v += 9;
322     }
323     z[0] = sum1; z[1] = sum2; z[2] = sum3;
324     z += 3;
325   }
326   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
327   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
328   PetscLogFlops(18*a->nz - A->m);
329   PetscFunctionReturn(0);
330 }
331 
332 #undef __FUNCT__
333 #define __FUNCT__ "MatMult_SeqBAIJ_4"
334 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
335 {
336   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
337   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
338   MatScalar       *v;
339   PetscErrorCode ierr;
340   int mbs=a->mbs,i,*idx,*ii,j,n;
341 
342   PetscFunctionBegin;
343   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
344   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
345 
346   idx   = a->j;
347   v     = a->a;
348   ii    = a->i;
349 
350   for (i=0; i<mbs; i++) {
351     n  = ii[1] - ii[0]; ii++;
352     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
353     for (j=0; j<n; j++) {
354       xb = x + 4*(*idx++);
355       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
356       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
357       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
358       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
359       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
360       v += 16;
361     }
362     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
363     z += 4;
364   }
365   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
366   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
367   PetscLogFlops(32*a->nz - A->m);
368   PetscFunctionReturn(0);
369 }
370 
371 #undef __FUNCT__
372 #define __FUNCT__ "MatMult_SeqBAIJ_5"
373 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
374 {
375   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
376   PetscScalar     sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*xb,*z,*x;
377   MatScalar       *v;
378   PetscErrorCode ierr;
379   int mbs=a->mbs,i,*idx,*ii,j,n;
380 
381   PetscFunctionBegin;
382   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
383   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
384 
385   idx   = a->j;
386   v     = a->a;
387   ii    = a->i;
388 
389   for (i=0; i<mbs; i++) {
390     n  = ii[1] - ii[0]; ii++;
391     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
392     for (j=0; j<n; j++) {
393       xb = x + 5*(*idx++);
394       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
395       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
396       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
397       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
398       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
399       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
400       v += 25;
401     }
402     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
403     z += 5;
404   }
405   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
406   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
407   PetscLogFlops(50*a->nz - A->m);
408   PetscFunctionReturn(0);
409 }
410 
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "MatMult_SeqBAIJ_6"
414 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
415 {
416   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
417   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
418   PetscScalar     x1,x2,x3,x4,x5,x6;
419   MatScalar       *v;
420   PetscErrorCode ierr;
421   int   mbs=a->mbs,i,*idx,*ii,j,n;
422 
423   PetscFunctionBegin;
424   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
425   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
426 
427   idx   = a->j;
428   v     = a->a;
429   ii    = a->i;
430 
431   for (i=0; i<mbs; i++) {
432     n  = ii[1] - ii[0]; ii++;
433     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
434     for (j=0; j<n; j++) {
435       xb = x + 6*(*idx++);
436       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
437       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
438       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
439       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
440       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
441       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
442       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
443       v += 36;
444     }
445     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
446     z += 6;
447   }
448 
449   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
450   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
451   PetscLogFlops(72*a->nz - A->m);
452   PetscFunctionReturn(0);
453 }
454 #undef __FUNCT__
455 #define __FUNCT__ "MatMult_SeqBAIJ_7"
456 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
457 {
458   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
459   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
460   PetscScalar     x1,x2,x3,x4,x5,x6,x7;
461   MatScalar       *v;
462   PetscErrorCode ierr;
463   int mbs=a->mbs,i,*idx,*ii,j,n;
464 
465   PetscFunctionBegin;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
468 
469   idx   = a->j;
470   v     = a->a;
471   ii    = a->i;
472 
473   for (i=0; i<mbs; i++) {
474     n  = ii[1] - ii[0]; ii++;
475     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
476     for (j=0; j<n; j++) {
477       xb = x + 7*(*idx++);
478       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
479       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
480       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
481       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
482       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
483       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
484       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
485       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
486       v += 49;
487     }
488     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
489     z += 7;
490   }
491 
492   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
493   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
494   PetscLogFlops(98*a->nz - A->m);
495   PetscFunctionReturn(0);
496 }
497 
498 /*
499     This will not work with MatScalar == float because it calls the BLAS
500 */
501 #undef __FUNCT__
502 #define __FUNCT__ "MatMult_SeqBAIJ_N"
503 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
504 {
505   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
506   PetscScalar     *x,*z,*xb,*work,*workt;
507   MatScalar       *v;
508   PetscErrorCode ierr;
509   int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
510   int             ncols,k;
511 
512   PetscFunctionBegin;
513   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
514   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
515 
516   idx   = a->j;
517   v     = a->a;
518   ii    = a->i;
519 
520 
521   if (!a->mult_work) {
522     k    = PetscMax(A->m,A->n);
523     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
524   }
525   work = a->mult_work;
526   for (i=0; i<mbs; i++) {
527     n     = ii[1] - ii[0]; ii++;
528     ncols = n*bs;
529     workt = work;
530     for (j=0; j<n; j++) {
531       xb = x + bs*(*idx++);
532       for (k=0; k<bs; k++) workt[k] = xb[k];
533       workt += bs;
534     }
535     Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
536     /* LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
537     v += n*bs2;
538     z += bs;
539   }
540   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
541   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
542   PetscLogFlops(2*a->nz*bs2 - A->m);
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1"
548 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
549 {
550   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
551   PetscScalar     *x,*y,*z,sum;
552   MatScalar       *v;
553   PetscErrorCode ierr;
554   int mbs=a->mbs,i,*idx,*ii,n;
555 
556   PetscFunctionBegin;
557   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
558   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
559   if (zz != yy) {
560     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
561   } else {
562     z = y;
563   }
564 
565   idx   = a->j;
566   v     = a->a;
567   ii    = a->i;
568 
569   for (i=0; i<mbs; i++) {
570     n    = ii[1] - ii[0]; ii++;
571     sum  = y[i];
572     while (n--) sum += *v++ * x[*idx++];
573     z[i] = sum;
574   }
575   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
576   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
577   if (zz != yy) {
578     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
579   }
580   PetscLogFlops(2*a->nz);
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2"
586 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
587 {
588   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
589   PetscScalar     *x,*y,*z,*xb,sum1,sum2;
590   PetscScalar     x1,x2;
591   MatScalar       *v;
592   PetscErrorCode ierr;
593   int mbs=a->mbs,i,*idx,*ii,j,n;
594 
595   PetscFunctionBegin;
596   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
597   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
598   if (zz != yy) {
599     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
600   } else {
601     z = y;
602   }
603 
604   idx   = a->j;
605   v     = a->a;
606   ii    = a->i;
607 
608   for (i=0; i<mbs; i++) {
609     n  = ii[1] - ii[0]; ii++;
610     sum1 = y[0]; sum2 = y[1];
611     for (j=0; j<n; j++) {
612       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
613       sum1 += v[0]*x1 + v[2]*x2;
614       sum2 += v[1]*x1 + v[3]*x2;
615       v += 4;
616     }
617     z[0] = sum1; z[1] = sum2;
618     z += 2; y += 2;
619   }
620   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
621   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
622   if (zz != yy) {
623     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
624   }
625   PetscLogFlops(4*a->nz);
626   PetscFunctionReturn(0);
627 }
628 
629 #undef __FUNCT__
630 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3"
631 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
632 {
633   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
634   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,x1,x2,x3;
635   MatScalar       *v;
636   PetscErrorCode ierr;
637   int mbs=a->mbs,i,*idx,*ii,j,n;
638 
639   PetscFunctionBegin;
640   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
641   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
642   if (zz != yy) {
643     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
644   } else {
645     z = y;
646   }
647 
648   idx   = a->j;
649   v     = a->a;
650   ii    = a->i;
651 
652   for (i=0; i<mbs; i++) {
653     n  = ii[1] - ii[0]; ii++;
654     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
655     for (j=0; j<n; j++) {
656       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
657       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
658       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
659       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
660       v += 9;
661     }
662     z[0] = sum1; z[1] = sum2; z[2] = sum3;
663     z += 3; y += 3;
664   }
665   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
666   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
667   if (zz != yy) {
668     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
669   }
670   PetscLogFlops(18*a->nz);
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4"
676 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
677 {
678   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
679   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
680   MatScalar       *v;
681   PetscErrorCode ierr;
682   int  mbs=a->mbs,i,*idx,*ii;
683   int             j,n;
684 
685   PetscFunctionBegin;
686   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
687   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
688   if (zz != yy) {
689     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
690   } else {
691     z = y;
692   }
693 
694   idx   = a->j;
695   v     = a->a;
696   ii    = a->i;
697 
698   for (i=0; i<mbs; i++) {
699     n  = ii[1] - ii[0]; ii++;
700     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
701     for (j=0; j<n; j++) {
702       xb = x + 4*(*idx++);
703       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
704       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
705       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
706       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
707       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
708       v += 16;
709     }
710     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
711     z += 4; y += 4;
712   }
713   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
714   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
715   if (zz != yy) {
716     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
717   }
718   PetscLogFlops(32*a->nz);
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNCT__
723 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5"
724 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
725 {
726   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
727   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
728   MatScalar       *v;
729   PetscErrorCode ierr;
730   int mbs=a->mbs,i,*idx,*ii,j,n;
731 
732   PetscFunctionBegin;
733   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
734   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
735   if (zz != yy) {
736     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
737   } else {
738     z = y;
739   }
740 
741   idx   = a->j;
742   v     = a->a;
743   ii    = a->i;
744 
745   for (i=0; i<mbs; i++) {
746     n  = ii[1] - ii[0]; ii++;
747     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
748     for (j=0; j<n; j++) {
749       xb = x + 5*(*idx++);
750       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
751       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
752       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
753       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
754       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
755       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
756       v += 25;
757     }
758     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
759     z += 5; y += 5;
760   }
761   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
762   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
763   if (zz != yy) {
764     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
765   }
766   PetscLogFlops(50*a->nz);
767   PetscFunctionReturn(0);
768 }
769 #undef __FUNCT__
770 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6"
771 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
772 {
773   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
774   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
775   PetscScalar     x1,x2,x3,x4,x5,x6;
776   MatScalar       *v;
777   PetscErrorCode ierr;
778   int mbs=a->mbs,i,*idx,*ii,j,n;
779 
780   PetscFunctionBegin;
781   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
782   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
783   if (zz != yy) {
784     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
785   } else {
786     z = y;
787   }
788 
789   idx   = a->j;
790   v     = a->a;
791   ii    = a->i;
792 
793   for (i=0; i<mbs; i++) {
794     n  = ii[1] - ii[0]; ii++;
795     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
796     for (j=0; j<n; j++) {
797       xb = x + 6*(*idx++);
798       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
799       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
800       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
801       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
802       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
803       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
804       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
805       v += 36;
806     }
807     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
808     z += 6; y += 6;
809   }
810   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
811   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
812   if (zz != yy) {
813     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
814   }
815   PetscLogFlops(72*a->nz);
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7"
821 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
822 {
823   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
824   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
825   PetscScalar     x1,x2,x3,x4,x5,x6,x7;
826   MatScalar       *v;
827   PetscErrorCode ierr;
828   int mbs=a->mbs,i,*idx,*ii,j,n;
829 
830   PetscFunctionBegin;
831   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
832   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
833   if (zz != yy) {
834     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
835   } else {
836     z = y;
837   }
838 
839   idx   = a->j;
840   v     = a->a;
841   ii    = a->i;
842 
843   for (i=0; i<mbs; i++) {
844     n  = ii[1] - ii[0]; ii++;
845     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
846     for (j=0; j<n; j++) {
847       xb = x + 7*(*idx++);
848       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
849       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
850       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
851       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
852       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
853       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
854       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
855       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
856       v += 49;
857     }
858     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
859     z += 7; y += 7;
860   }
861   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
862   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
863   if (zz != yy) {
864     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
865   }
866   PetscLogFlops(98*a->nz);
867   PetscFunctionReturn(0);
868 }
869 
870 #undef __FUNCT__
871 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N"
872 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
873 {
874   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
875   PetscScalar    *x,*z,*xb,*work,*workt,*y;
876   MatScalar      *v;
877   int            mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
878   int            ncols,k;
879 
880   PetscFunctionBegin;
881   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
882   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
883   if (zz != yy) {
884     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
885     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
886     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
887   }
888 
889   idx   = a->j;
890   v     = a->a;
891   ii    = a->i;
892 
893 
894   if (!a->mult_work) {
895     k    = PetscMax(A->m,A->n);
896     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
897   }
898   work = a->mult_work;
899   for (i=0; i<mbs; i++) {
900     n     = ii[1] - ii[0]; ii++;
901     ncols = n*bs;
902     workt = work;
903     for (j=0; j<n; j++) {
904       xb = x + bs*(*idx++);
905       for (k=0; k<bs; k++) workt[k] = xb[k];
906       workt += bs;
907     }
908     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
909     /* LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
910     v += n*bs2;
911     z += bs;
912   }
913   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
914   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
915   PetscLogFlops(2*a->nz*bs2);
916   PetscFunctionReturn(0);
917 }
918 
919 #undef __FUNCT__
920 #define __FUNCT__ "MatMultTranspose_SeqBAIJ"
921 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
922 {
923   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
924   PetscScalar     *xg,*zg,*zb,zero = 0.0;
925   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
926   MatScalar       *v;
927   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval;
928   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
929 
930   PetscFunctionBegin;
931   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
932   ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg;
933   ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg;
934 
935   idx   = a->j;
936   v     = a->a;
937   ii    = a->i;
938   xb    = x;
939   switch (bs) {
940   case 1:
941     for (i=0; i<mbs; i++) {
942       n  = ii[1] - ii[0]; ii++;
943       x1 = xb[0];
944       ib = idx + ai[i];
945       for (j=0; j<n; j++) {
946         rval    = ib[j];
947         z[rval] += *v * x1;
948         v++;
949       }
950       xb++;
951     }
952     break;
953   case 2:
954     for (i=0; i<mbs; i++) {
955       n  = ii[1] - ii[0]; ii++;
956       x1 = xb[0]; x2 = xb[1];
957       ib = idx + ai[i];
958       for (j=0; j<n; j++) {
959         rval      = ib[j]*2;
960         z[rval++] += v[0]*x1 + v[1]*x2;
961         z[rval]   += v[2]*x1 + v[3]*x2;
962         v  += 4;
963       }
964       xb += 2;
965     }
966     break;
967   case 3:
968     for (i=0; i<mbs; i++) {
969       n  = ii[1] - ii[0]; ii++;
970       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
971       ib = idx + ai[i];
972       for (j=0; j<n; j++) {
973         rval      = ib[j]*3;
974         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
975         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
976         z[rval]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
977         v  += 9;
978       }
979       xb += 3;
980     }
981     break;
982   case 4:
983     for (i=0; i<mbs; i++) {
984       n  = ii[1] - ii[0]; ii++;
985       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
986       ib = idx + ai[i];
987       for (j=0; j<n; j++) {
988         rval      = ib[j]*4;
989         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
990         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
991         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
992         z[rval]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
993         v  += 16;
994       }
995       xb += 4;
996     }
997     break;
998   case 5:
999     for (i=0; i<mbs; i++) {
1000       n  = ii[1] - ii[0]; ii++;
1001       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1002       x4 = xb[3]; x5 = xb[4];
1003       ib = idx + ai[i];
1004       for (j=0; j<n; j++) {
1005         rval      = ib[j]*5;
1006         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1007         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1008         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1009         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1010         z[rval]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1011         v  += 25;
1012       }
1013       xb += 5;
1014     }
1015     break;
1016   case 6:
1017     for (i=0; i<mbs; i++) {
1018       n  = ii[1] - ii[0]; ii++;
1019       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1020       x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1021       ib = idx + ai[i];
1022       for (j=0; j<n; j++) {
1023         rval      = ib[j]*6;
1024         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6;
1025         z[rval++] +=  v[6]*x1 +  v[7]*x2 +  v[8]*x3 +  v[9]*x4 + v[10]*x5 + v[11]*x6;
1026         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
1027         z[rval++] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
1028         z[rval++] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
1029         z[rval]   += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
1030         v  += 36;
1031       }
1032       xb += 6;
1033     }
1034     break;
1035   case 7:
1036     for (i=0; i<mbs; i++) {
1037       n  = ii[1] - ii[0]; ii++;
1038       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1039       x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1040       ib = idx + ai[i];
1041       for (j=0; j<n; j++) {
1042         rval      = ib[j]*7;
1043         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7;
1044         z[rval++] +=  v[7]*x1 +  v[8]*x2 +  v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
1045         z[rval++] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
1046         z[rval++] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
1047         z[rval++] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
1048         z[rval++] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
1049         z[rval]   += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
1050         v  += 49;
1051       }
1052       xb += 7;
1053     }
1054     break;
1055   default: {       /* block sizes larger then 7 by 7 are handled by BLAS */
1056       int          ncols,k;
1057       PetscScalar  *work,*workt;
1058 
1059       if (!a->mult_work) {
1060         k = PetscMax(A->m,A->n);
1061         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1062       }
1063       work = a->mult_work;
1064       for (i=0; i<mbs; i++) {
1065         n     = ii[1] - ii[0]; ii++;
1066         ncols = n*bs;
1067         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1068         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
1069         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
1070         v += n*bs2;
1071         x += bs;
1072         workt = work;
1073         for (j=0; j<n; j++) {
1074           zb = z + bs*(*idx++);
1075           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1076           workt += bs;
1077         }
1078       }
1079     }
1080   }
1081   ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr);
1082   ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr);
1083   PetscLogFlops(2*a->nz*a->bs2 - A->n);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ"
1089 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1090 
1091 {
1092   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1093   PetscScalar     *xg,*zg,*zb,*x,*z,*xb,x1,x2,x3,x4,x5;
1094   MatScalar       *v;
1095   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1096 
1097   PetscFunctionBegin;
1098   if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); }
1099   ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg;
1100   ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg;
1101 
1102 
1103   idx   = a->j;
1104   v     = a->a;
1105   ii    = a->i;
1106   xb    = x;
1107 
1108   switch (bs) {
1109   case 1:
1110     for (i=0; i<mbs; i++) {
1111       n  = ii[1] - ii[0]; ii++;
1112       x1 = xb[0];
1113       ib = idx + ai[i];
1114       for (j=0; j<n; j++) {
1115         rval    = ib[j];
1116         z[rval] += *v * x1;
1117         v++;
1118       }
1119       xb++;
1120     }
1121     break;
1122   case 2:
1123     for (i=0; i<mbs; i++) {
1124       n  = ii[1] - ii[0]; ii++;
1125       x1 = xb[0]; x2 = xb[1];
1126       ib = idx + ai[i];
1127       for (j=0; j<n; j++) {
1128         rval      = ib[j]*2;
1129         z[rval++] += v[0]*x1 + v[1]*x2;
1130         z[rval++] += v[2]*x1 + v[3]*x2;
1131         v  += 4;
1132       }
1133       xb += 2;
1134     }
1135     break;
1136   case 3:
1137     for (i=0; i<mbs; i++) {
1138       n  = ii[1] - ii[0]; ii++;
1139       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1140       ib = idx + ai[i];
1141       for (j=0; j<n; j++) {
1142         rval      = ib[j]*3;
1143         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1144         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1145         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1146         v  += 9;
1147       }
1148       xb += 3;
1149     }
1150     break;
1151   case 4:
1152     for (i=0; i<mbs; i++) {
1153       n  = ii[1] - ii[0]; ii++;
1154       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1155       ib = idx + ai[i];
1156       for (j=0; j<n; j++) {
1157         rval      = ib[j]*4;
1158         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1159         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1160         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1161         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1162         v  += 16;
1163       }
1164       xb += 4;
1165     }
1166     break;
1167   case 5:
1168     for (i=0; i<mbs; i++) {
1169       n  = ii[1] - ii[0]; ii++;
1170       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1171       x4 = xb[3]; x5 = xb[4];
1172       ib = idx + ai[i];
1173       for (j=0; j<n; j++) {
1174         rval      = ib[j]*5;
1175         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1176         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1177         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1178         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1179         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1180         v  += 25;
1181       }
1182       xb += 5;
1183     }
1184     break;
1185   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
1186       int          ncols,k;
1187       PetscScalar  *work,*workt;
1188 
1189       if (!a->mult_work) {
1190         k = PetscMax(A->m,A->n);
1191         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1192       }
1193       work = a->mult_work;
1194       for (i=0; i<mbs; i++) {
1195         n     = ii[1] - ii[0]; ii++;
1196         ncols = n*bs;
1197         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1198         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
1199         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
1200         v += n*bs2;
1201         x += bs;
1202         workt = work;
1203         for (j=0; j<n; j++) {
1204           zb = z + bs*(*idx++);
1205           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1206           workt += bs;
1207         }
1208       }
1209     }
1210   }
1211   ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr);
1212   ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr);
1213   PetscLogFlops(2*a->nz*a->bs2);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatScale_SeqBAIJ"
1219 PetscErrorCode MatScale_SeqBAIJ(const PetscScalar *alpha,Mat inA)
1220 {
1221   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1222   int         totalnz = a->bs2*a->nz;
1223 #if defined(PETSC_USE_MAT_SINGLE)
1224   int         i;
1225 #else
1226   int         one = 1;
1227 #endif
1228 
1229   PetscFunctionBegin;
1230 #if defined(PETSC_USE_MAT_SINGLE)
1231   for (i=0; i<totalnz; i++) a->a[i] *= *alpha;
1232 #else
1233   BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one);
1234 #endif
1235   PetscLogFlops(totalnz);
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatNorm_SeqBAIJ"
1241 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
1242 {
1243   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1244   MatScalar   *v = a->a;
1245   PetscReal   sum = 0.0;
1246   int         i,j,k,bs = a->bs,nz=a->nz,bs2=a->bs2,k1;
1247 
1248   PetscFunctionBegin;
1249   if (type == NORM_FROBENIUS) {
1250     for (i=0; i< bs2*nz; i++) {
1251 #if defined(PETSC_USE_COMPLEX)
1252       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1253 #else
1254       sum += (*v)*(*v); v++;
1255 #endif
1256     }
1257     *norm = sqrt(sum);
1258   }  else if (type == NORM_INFINITY) { /* maximum row sum */
1259     *norm = 0.0;
1260     for (k=0; k<bs; k++) {
1261       for (j=0; j<a->mbs; j++) {
1262         v = a->a + bs2*a->i[j] + k;
1263         sum = 0.0;
1264         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
1265           for (k1=0; k1<bs; k1++){
1266             sum += PetscAbsScalar(*v);
1267             v   += bs;
1268           }
1269         }
1270         if (sum > *norm) *norm = sum;
1271       }
1272     }
1273   } else {
1274     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatEqual_SeqBAIJ"
1282 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg)
1283 {
1284   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1289   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
1290     *flg = PETSC_FALSE;
1291     PetscFunctionReturn(0);
1292   }
1293 
1294   /* if the a->i are the same */
1295   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr);
1296   if (*flg == PETSC_FALSE) {
1297     PetscFunctionReturn(0);
1298   }
1299 
1300   /* if a->j are the same */
1301   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);CHKERRQ(ierr);
1302   if (*flg == PETSC_FALSE) {
1303     PetscFunctionReturn(0);
1304   }
1305   /* if a->a are the same */
1306   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ"
1313 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1314 {
1315   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1316   PetscErrorCode ierr;
1317   int  i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1318   PetscScalar  *x,zero = 0.0;
1319   MatScalar    *aa,*aa_j;
1320 
1321   PetscFunctionBegin;
1322   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1323   bs   = a->bs;
1324   aa   = a->a;
1325   ai   = a->i;
1326   aj   = a->j;
1327   ambs = a->mbs;
1328   bs2  = a->bs2;
1329 
1330   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1331   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1332   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1333   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1334   for (i=0; i<ambs; i++) {
1335     for (j=ai[i]; j<ai[i+1]; j++) {
1336       if (aj[j] == i) {
1337         row  = i*bs;
1338         aa_j = aa+j*bs2;
1339         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1340         break;
1341       }
1342     }
1343   }
1344   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ"
1350 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1351 {
1352   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1353   PetscScalar  *l,*r,x,*li,*ri;
1354   MatScalar    *aa,*v;
1355   PetscErrorCode ierr;
1356   int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1357 
1358   PetscFunctionBegin;
1359   ai  = a->i;
1360   aj  = a->j;
1361   aa  = a->a;
1362   m   = A->m;
1363   n   = A->n;
1364   bs  = a->bs;
1365   mbs = a->mbs;
1366   bs2 = a->bs2;
1367   if (ll) {
1368     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
1369     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1370     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1371     for (i=0; i<mbs; i++) { /* for each block row */
1372       M  = ai[i+1] - ai[i];
1373       li = l + i*bs;
1374       v  = aa + bs2*ai[i];
1375       for (j=0; j<M; j++) { /* for each block */
1376         for (k=0; k<bs2; k++) {
1377           (*v++) *= li[k%bs];
1378         }
1379       }
1380     }
1381     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1382     PetscLogFlops(a->nz);
1383   }
1384 
1385   if (rr) {
1386     ierr = VecGetArray(rr,&r);CHKERRQ(ierr);
1387     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1388     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1389     for (i=0; i<mbs; i++) { /* for each block row */
1390       M  = ai[i+1] - ai[i];
1391       v  = aa + bs2*ai[i];
1392       for (j=0; j<M; j++) { /* for each block */
1393         ri = r + bs*aj[ai[i]+j];
1394         for (k=0; k<bs; k++) {
1395           x = ri[k];
1396           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1397         }
1398       }
1399     }
1400     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
1401     PetscLogFlops(a->nz);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatGetInfo_SeqBAIJ"
1409 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1410 {
1411   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1412 
1413   PetscFunctionBegin;
1414   info->rows_global    = (double)A->m;
1415   info->columns_global = (double)A->n;
1416   info->rows_local     = (double)A->m;
1417   info->columns_local  = (double)A->n;
1418   info->block_size     = a->bs2;
1419   info->nz_allocated   = a->maxnz;
1420   info->nz_used        = a->bs2*a->nz;
1421   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1422   info->assemblies   = A->num_ass;
1423   info->mallocs      = a->reallocs;
1424   info->memory       = A->mem;
1425   if (A->factor) {
1426     info->fill_ratio_given  = A->info.fill_ratio_given;
1427     info->fill_ratio_needed = A->info.fill_ratio_needed;
1428     info->factor_mallocs    = A->info.factor_mallocs;
1429   } else {
1430     info->fill_ratio_given  = 0;
1431     info->fill_ratio_needed = 0;
1432     info->factor_mallocs    = 0;
1433   }
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatZeroEntries_SeqBAIJ"
1440 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A)
1441 {
1442   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1443   PetscErrorCode ierr;
1444 
1445   PetscFunctionBegin;
1446   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
1447   PetscFunctionReturn(0);
1448 }
1449