xref: /petsc/src/mat/impls/baij/seq/baij2.c (revision ee54c7eeedc0e12ac48b27e5e83e6f2d15f2ca1c)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/seq/baij.h"
4 #include "../src/mat/blockinvert.h"
5 #include "petscbt.h"
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
9 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
10 {
11   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
12   PetscErrorCode ierr;
13   PetscInt       row,i,j,k,l,m,n,*nidx,isz,val,ival;
14   const PetscInt *idx;
15   PetscInt       start,end,*ai,*aj,bs,*nidx2;
16   PetscBT        table;
17 
18   PetscFunctionBegin;
19   m     = a->mbs;
20   ai    = a->i;
21   aj    = a->j;
22   bs    = A->rmap->bs;
23 
24   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
25 
26   ierr = PetscBTCreate(m,table);CHKERRQ(ierr);
27   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
28   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
29 
30   for (i=0; i<is_max; i++) {
31     /* Initialise the two local arrays */
32     isz  = 0;
33     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
34 
35     /* Extract the indices, assume there can be duplicate entries */
36     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
37     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
38 
39     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
40     for (j=0; j<n ; ++j){
41       ival = idx[j]/bs; /* convert the indices into block indices */
42       if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
44     }
45     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
46     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
47 
48     k = 0;
49     for (j=0; j<ov; j++){ /* for each overlap*/
50       n = isz;
51       for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
52         row   = nidx[k];
53         start = ai[row];
54         end   = ai[row+1];
55         for (l = start; l<end ; l++){
56           val = aj[l];
57           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
58         }
59       }
60     }
61     /* expand the Index Set */
62     for (j=0; j<isz; j++) {
63       for (k=0; k<bs; k++)
64         nidx2[j*bs+k] = nidx[j]*bs+k;
65     }
66     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
67   }
68   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
69   ierr = PetscFree(nidx);CHKERRQ(ierr);
70   ierr = PetscFree(nidx2);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private"
76 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
77 {
78   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*c;
79   PetscErrorCode ierr;
80   PetscInt       *smap,i,k,kstart,kend,oldcols = a->nbs,*lens;
81   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
82   const PetscInt *irow,*icol;
83   PetscInt       nrows,ncols,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
84   PetscInt       *aj = a->j,*ai = a->i;
85   MatScalar      *mat_a;
86   Mat            C;
87   PetscTruth     flag,sorted;
88 
89   PetscFunctionBegin;
90   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
91   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
92 
93   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
94   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
95   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
96   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
97 
98   ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr);
99   ssmap = smap;
100   ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
101   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
103   /* determine lens of each row */
104   for (i=0; i<nrows; i++) {
105     kstart  = ai[irow[i]];
106     kend    = kstart + a->ilen[irow[i]];
107     lens[i] = 0;
108       for (k=kstart; k<kend; k++) {
109         if (ssmap[aj[k]]) {
110           lens[i]++;
111         }
112       }
113     }
114   /* Create and fill new matrix */
115   if (scall == MAT_REUSE_MATRIX) {
116     c = (Mat_SeqBAIJ *)((*B)->data);
117 
118     if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
119     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
120     if (!flag) {
121       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
122     }
123     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
124     C = *B;
125   } else {
126     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
127     ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
128     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
129     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr);
130   }
131   c = (Mat_SeqBAIJ *)(C->data);
132   for (i=0; i<nrows; i++) {
133     row    = irow[i];
134     kstart = ai[row];
135     kend   = kstart + a->ilen[row];
136     mat_i  = c->i[i];
137     mat_j  = c->j + mat_i;
138     mat_a  = c->a + mat_i*bs2;
139     mat_ilen = c->ilen + i;
140     for (k=kstart; k<kend; k++) {
141       if ((tcol=ssmap[a->j[k]])) {
142         *mat_j++ = tcol - 1;
143         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
144         mat_a   += bs2;
145         (*mat_ilen)++;
146       }
147     }
148   }
149 
150   /* Free work space */
151   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
152   ierr = PetscFree(smap);CHKERRQ(ierr);
153   ierr = PetscFree(lens);CHKERRQ(ierr);
154   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
155   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
156 
157   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
158   *B = C;
159   PetscFunctionReturn(0);
160 }
161 
162 #undef __FUNCT__
163 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ"
164 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
165 {
166   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
167   IS             is1,is2;
168   PetscErrorCode ierr;
169   PetscInt       *vary,*iary,nrows,ncols,i,bs=A->rmap->bs,count;
170   const PetscInt *irow,*icol;
171 
172   PetscFunctionBegin;
173   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
174   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
175   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
176   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
177 
178   /* Verify if the indices corespond to each element in a block
179    and form the IS with compressed IS */
180   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr);
181   iary = vary + a->mbs;
182   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
183   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
184   count = 0;
185   for (i=0; i<a->mbs; i++) {
186     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks");
187     if (vary[i]==bs) iary[count++] = i;
188   }
189   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
190 
191   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
192   for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
193   count = 0;
194   for (i=0; i<a->mbs; i++) {
195     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc");
196     if (vary[i]==bs) iary[count++] = i;
197   }
198   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr);
199   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
200   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
201   ierr = PetscFree(vary);CHKERRQ(ierr);
202 
203   ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,scall,B);CHKERRQ(ierr);
204   ISDestroy(is1);
205   ISDestroy(is2);
206   PetscFunctionReturn(0);
207 }
208 
209 #undef __FUNCT__
210 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ"
211 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
212 {
213   PetscErrorCode ierr;
214   PetscInt       i;
215 
216   PetscFunctionBegin;
217   if (scall == MAT_INITIAL_MATRIX) {
218     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
219   }
220 
221   for (i=0; i<n; i++) {
222     ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr);
223   }
224   PetscFunctionReturn(0);
225 }
226 
227 
228 /* -------------------------------------------------------*/
229 /* Should check that shapes of vectors and matrices match */
230 /* -------------------------------------------------------*/
231 #include "petscblaslapack.h"
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatMult_SeqBAIJ_1"
235 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
236 {
237   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
238   PetscScalar       *z,sum;
239   const PetscScalar *x;
240   const MatScalar   *v;
241   PetscErrorCode    ierr;
242   PetscInt          mbs,i,n,nonzerorow=0;
243   const PetscInt    *idx,*ii,*ridx=PETSC_NULL;
244   PetscTruth        usecprow=a->compressedrow.use;
245 
246   PetscFunctionBegin;
247   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
248   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
249 
250   if (usecprow){
251     mbs  = a->compressedrow.nrows;
252     ii   = a->compressedrow.i;
253     ridx = a->compressedrow.rindex;
254   } else {
255     mbs = a->mbs;
256     ii  = a->i;
257   }
258 
259   for (i=0; i<mbs; i++) {
260     n    = ii[1] - ii[0];
261     v    = a->a + ii[0];
262     idx  = a->j + ii[0];
263     ii++;
264     sum  = 0.0;
265     nonzerorow += (n>0);
266     PetscSparseDensePlusDot(sum,x,v,idx,n);
267     if (usecprow){
268       z[ridx[i]] = sum;
269     } else {
270       z[i] = sum;
271     }
272   }
273   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
274   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
275   ierr = PetscLogFlops(2.0*a->nz - nonzerorow);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatMult_SeqBAIJ_2"
281 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
282 {
283   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
284   PetscScalar       *z = 0,sum1,sum2,*zarray;
285   const PetscScalar *x,*xb;
286   PetscScalar       x1,x2;
287   const MatScalar   *v;
288   PetscErrorCode    ierr;
289   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
290   PetscTruth        usecprow=a->compressedrow.use;
291 
292   PetscFunctionBegin;
293   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
294   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
295 
296   idx = a->j;
297   v   = a->a;
298   if (usecprow){
299     mbs  = a->compressedrow.nrows;
300     ii   = a->compressedrow.i;
301     ridx = a->compressedrow.rindex;
302   } else {
303     mbs = a->mbs;
304     ii  = a->i;
305     z   = zarray;
306   }
307 
308   for (i=0; i<mbs; i++) {
309     n  = ii[1] - ii[0]; ii++;
310     sum1 = 0.0; sum2 = 0.0;
311     nonzerorow += (n>0);
312     for (j=0; j<n; j++) {
313       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
314       sum1 += v[0]*x1 + v[2]*x2;
315       sum2 += v[1]*x1 + v[3]*x2;
316       v += 4;
317     }
318     if (usecprow) z = zarray + 2*ridx[i];
319     z[0] = sum1; z[1] = sum2;
320     if (!usecprow) z += 2;
321   }
322   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
324   ierr = PetscLogFlops(8.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatMult_SeqBAIJ_3"
330 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
331 {
332   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
333   PetscScalar       *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray;
334   const PetscScalar *x,*xb;
335   const MatScalar   *v;
336   PetscErrorCode    ierr;
337   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
338   PetscTruth        usecprow=a->compressedrow.use;
339 
340 
341 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
342 #pragma disjoint(*v,*z,*xb)
343 #endif
344 
345   PetscFunctionBegin;
346   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
347   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
348 
349   idx = a->j;
350   v   = a->a;
351   if (usecprow){
352     mbs  = a->compressedrow.nrows;
353     ii   = a->compressedrow.i;
354     ridx = a->compressedrow.rindex;
355   } else {
356     mbs = a->mbs;
357     ii  = a->i;
358     z   = zarray;
359   }
360 
361   for (i=0; i<mbs; i++) {
362     n  = ii[1] - ii[0]; ii++;
363     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
364     nonzerorow += (n>0);
365     for (j=0; j<n; j++) {
366       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
367       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
368       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
369       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
370       v += 9;
371     }
372     if (usecprow) z = zarray + 3*ridx[i];
373     z[0] = sum1; z[1] = sum2; z[2] = sum3;
374     if (!usecprow) z += 3;
375   }
376   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
377   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
378   ierr = PetscLogFlops(18.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
379   PetscFunctionReturn(0);
380 }
381 
382 #undef __FUNCT__
383 #define __FUNCT__ "MatMult_SeqBAIJ_4"
384 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
385 {
386   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
387   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray;
388   const PetscScalar *x,*xb;
389   const MatScalar   *v;
390   PetscErrorCode    ierr;
391   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
392   PetscTruth        usecprow=a->compressedrow.use;
393 
394   PetscFunctionBegin;
395   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
396   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
397 
398   idx = a->j;
399   v   = a->a;
400   if (usecprow){
401     mbs  = a->compressedrow.nrows;
402     ii   = a->compressedrow.i;
403     ridx = a->compressedrow.rindex;
404   } else {
405     mbs = a->mbs;
406     ii  = a->i;
407     z   = zarray;
408   }
409 
410   for (i=0; i<mbs; i++) {
411     n  = ii[1] - ii[0]; ii++;
412     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
413     nonzerorow += (n>0);
414     for (j=0; j<n; j++) {
415       xb = x + 4*(*idx++);
416       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
417       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
418       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
419       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
420       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
421       v += 16;
422     }
423     if (usecprow) z = zarray + 4*ridx[i];
424     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
425     if (!usecprow) z += 4;
426   }
427   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
428   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
429   ierr = PetscLogFlops(32.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatMult_SeqBAIJ_5"
435 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
436 {
437   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
438   PetscScalar       sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray;
439   const PetscScalar *xb,*x;
440   const MatScalar   *v;
441   PetscErrorCode    ierr;
442   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
443   PetscTruth        usecprow=a->compressedrow.use;
444 
445   PetscFunctionBegin;
446   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
447   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
448 
449   idx = a->j;
450   v   = a->a;
451   if (usecprow){
452     mbs  = a->compressedrow.nrows;
453     ii   = a->compressedrow.i;
454     ridx = a->compressedrow.rindex;
455   } else {
456     mbs = a->mbs;
457     ii  = a->i;
458     z   = zarray;
459   }
460 
461   for (i=0; i<mbs; i++) {
462     n  = ii[1] - ii[0]; ii++;
463     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
464     nonzerorow += (n>0);
465     for (j=0; j<n; j++) {
466       xb = x + 5*(*idx++);
467       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
468       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
469       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
470       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
471       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
472       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
473       v += 25;
474     }
475     if (usecprow) z = zarray + 5*ridx[i];
476     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
477     if (!usecprow) z += 5;
478   }
479   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
480   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
481   ierr = PetscLogFlops(50.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484 
485 
486 #undef __FUNCT__
487 #define __FUNCT__ "MatMult_SeqBAIJ_6"
488 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
489 {
490   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
491   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6;
492   const PetscScalar *x,*xb;
493   PetscScalar       x1,x2,x3,x4,x5,x6,*zarray;
494   const MatScalar   *v;
495   PetscErrorCode    ierr;
496   PetscInt          mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
497   PetscTruth        usecprow=a->compressedrow.use;
498 
499   PetscFunctionBegin;
500   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
501   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
502 
503   idx = a->j;
504   v   = a->a;
505   if (usecprow){
506     mbs  = a->compressedrow.nrows;
507     ii   = a->compressedrow.i;
508     ridx = a->compressedrow.rindex;
509   } else {
510     mbs = a->mbs;
511     ii  = a->i;
512     z   = zarray;
513   }
514 
515   for (i=0; i<mbs; i++) {
516     n  = ii[1] - ii[0]; ii++;
517     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
518     nonzerorow += (n>0);
519     for (j=0; j<n; j++) {
520       xb = x + 6*(*idx++);
521       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
522       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
523       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
524       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
525       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
526       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
527       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
528       v += 36;
529     }
530     if (usecprow) z = zarray + 6*ridx[i];
531     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
532     if (!usecprow) z += 6;
533   }
534 
535   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
536   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
537   ierr = PetscLogFlops(72.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
538   PetscFunctionReturn(0);
539 }
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_SeqBAIJ_7"
542 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
543 {
544   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
545   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
546   const PetscScalar *x,*xb;
547   PetscScalar       x1,x2,x3,x4,x5,x6,x7,*zarray;
548   const MatScalar   *v;
549   PetscErrorCode    ierr;
550   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
551   PetscTruth        usecprow=a->compressedrow.use;
552 
553   PetscFunctionBegin;
554   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
555   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
556 
557   idx = a->j;
558   v   = a->a;
559   if (usecprow){
560     mbs    = a->compressedrow.nrows;
561     ii     = a->compressedrow.i;
562     ridx = a->compressedrow.rindex;
563   } else {
564     mbs = a->mbs;
565     ii  = a->i;
566     z   = zarray;
567   }
568 
569   for (i=0; i<mbs; i++) {
570     n  = ii[1] - ii[0]; ii++;
571     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
572     nonzerorow += (n>0);
573     for (j=0; j<n; j++) {
574       xb = x + 7*(*idx++);
575       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
576       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
577       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
578       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
579       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
580       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
581       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
582       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
583       v += 49;
584     }
585     if (usecprow) z = zarray + 7*ridx[i];
586     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
587     if (!usecprow) z += 7;
588   }
589 
590   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
591   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
592   ierr = PetscLogFlops(98.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
593   PetscFunctionReturn(0);
594 }
595 
596 /*
597     This will not work with MatScalar == float because it calls the BLAS
598 */
599 #undef __FUNCT__
600 #define __FUNCT__ "MatMult_SeqBAIJ_N"
601 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
602 {
603   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
604   PetscScalar    *x,*z = 0,*xb,*work,*workt,*zarray;
605   MatScalar      *v;
606   PetscErrorCode ierr;
607   PetscInt       mbs=a->mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2;
608   PetscInt       ncols,k,*ridx=PETSC_NULL,nonzerorow=0;
609   PetscTruth     usecprow=a->compressedrow.use;
610 
611   PetscFunctionBegin;
612   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
613   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
614 
615   idx = a->j;
616   v   = a->a;
617   if (usecprow){
618     mbs  = a->compressedrow.nrows;
619     ii   = a->compressedrow.i;
620     ridx = a->compressedrow.rindex;
621   } else {
622     mbs = a->mbs;
623     ii  = a->i;
624     z   = zarray;
625   }
626 
627   if (!a->mult_work) {
628     k    = PetscMax(A->rmap->n,A->cmap->n);
629     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
630   }
631   work = a->mult_work;
632   for (i=0; i<mbs; i++) {
633     n     = ii[1] - ii[0]; ii++;
634     ncols = n*bs;
635     workt = work;
636     nonzerorow += (n>0);
637     for (j=0; j<n; j++) {
638       xb = x + bs*(*idx++);
639       for (k=0; k<bs; k++) workt[k] = xb[k];
640       workt += bs;
641     }
642     if (usecprow) z = zarray + bs*ridx[i];
643     Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
644     /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
645     v += n*bs2;
646     if (!usecprow) z += bs;
647   }
648   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
649   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
650   ierr = PetscLogFlops(2.0*a->nz*bs2 - bs*nonzerorow);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 
654 extern PetscErrorCode VecCopy_Seq(Vec,Vec);
655 #undef __FUNCT__
656 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1"
657 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
658 {
659   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
660   PetscScalar    *x,*y = 0,*z = 0,sum,*yarray,*zarray;
661   MatScalar      *v;
662   PetscErrorCode ierr;
663   PetscInt       mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL;
664   PetscTruth     usecprow=a->compressedrow.use;
665 
666   PetscFunctionBegin;
667   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
668   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
669   if (zz != yy) {
670     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
671   } else {
672     zarray = yarray;
673   }
674 
675   idx = a->j;
676   v   = a->a;
677   if (usecprow){
678     if (zz != yy){
679       ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
680     }
681     mbs  = a->compressedrow.nrows;
682     ii   = a->compressedrow.i;
683     ridx = a->compressedrow.rindex;
684   } else {
685     ii  = a->i;
686     y   = yarray;
687     z   = zarray;
688   }
689 
690   for (i=0; i<mbs; i++) {
691     n    = ii[1] - ii[0]; ii++;
692     if (usecprow){
693       z = zarray + ridx[i];
694       y = yarray + ridx[i];
695     }
696     sum = y[0];
697     while (n--) sum += *v++ * x[*idx++];
698     z[0] = sum;
699     if (!usecprow){
700       z++; y++;
701     }
702   }
703   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
704   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
705   if (zz != yy) {
706     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
707   }
708   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2"
714 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
715 {
716   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
717   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2;
718   PetscScalar    x1,x2,*yarray,*zarray;
719   MatScalar      *v;
720   PetscErrorCode ierr;
721   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
722   PetscTruth     usecprow=a->compressedrow.use;
723 
724   PetscFunctionBegin;
725   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
726   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
727   if (zz != yy) {
728     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
729   } else {
730     zarray = yarray;
731   }
732 
733   idx = a->j;
734   v   = a->a;
735   if (usecprow){
736     if (zz != yy){
737       ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
738     }
739     mbs  = a->compressedrow.nrows;
740     ii   = a->compressedrow.i;
741     ridx = a->compressedrow.rindex;
742     if (zz != yy){
743       ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr);
744     }
745   } else {
746     ii  = a->i;
747     y   = yarray;
748     z   = zarray;
749   }
750 
751   for (i=0; i<mbs; i++) {
752     n  = ii[1] - ii[0]; ii++;
753     if (usecprow){
754       z = zarray + 2*ridx[i];
755       y = yarray + 2*ridx[i];
756     }
757     sum1 = y[0]; sum2 = y[1];
758     for (j=0; j<n; j++) {
759       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
760       sum1 += v[0]*x1 + v[2]*x2;
761       sum2 += v[1]*x1 + v[3]*x2;
762       v += 4;
763     }
764     z[0] = sum1; z[1] = sum2;
765     if (!usecprow){
766       z += 2; y += 2;
767     }
768   }
769   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
770   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
771   if (zz != yy) {
772     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
773   }
774   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
775   PetscFunctionReturn(0);
776 }
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3"
780 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
781 {
782   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
783   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray;
784   MatScalar      *v;
785   PetscErrorCode ierr;
786   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
787   PetscTruth     usecprow=a->compressedrow.use;
788 
789   PetscFunctionBegin;
790   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
791   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
792   if (zz != yy) {
793     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
794   } else {
795     zarray = yarray;
796   }
797 
798   idx = a->j;
799   v   = a->a;
800   if (usecprow){
801     if (zz != yy){
802       ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
803     }
804     mbs  = a->compressedrow.nrows;
805     ii   = a->compressedrow.i;
806     ridx = a->compressedrow.rindex;
807   } else {
808     ii  = a->i;
809     y   = yarray;
810     z   = zarray;
811   }
812 
813   for (i=0; i<mbs; i++) {
814     n  = ii[1] - ii[0]; ii++;
815     if (usecprow){
816       z = zarray + 3*ridx[i];
817       y = yarray + 3*ridx[i];
818     }
819     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
820     for (j=0; j<n; j++) {
821       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
822       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
823       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
824       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
825       v += 9;
826     }
827     z[0] = sum1; z[1] = sum2; z[2] = sum3;
828     if (!usecprow){
829       z += 3; y += 3;
830     }
831   }
832   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
833   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
834   if (zz != yy) {
835     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
836   }
837   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4"
843 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
844 {
845   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
846   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray;
847   MatScalar      *v;
848   PetscErrorCode ierr;
849   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
850   PetscTruth     usecprow=a->compressedrow.use;
851 
852   PetscFunctionBegin;
853   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
854   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
855   if (zz != yy) {
856     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
857   } else {
858     zarray = yarray;
859   }
860 
861   idx   = a->j;
862   v     = a->a;
863   if (usecprow){
864     if (zz != yy){
865       ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
866     }
867     mbs  = a->compressedrow.nrows;
868     ii   = a->compressedrow.i;
869     ridx = a->compressedrow.rindex;
870   } else {
871     ii  = a->i;
872     y   = yarray;
873     z   = zarray;
874   }
875 
876   for (i=0; i<mbs; i++) {
877     n  = ii[1] - ii[0]; ii++;
878     if (usecprow){
879       z = zarray + 4*ridx[i];
880       y = yarray + 4*ridx[i];
881     }
882     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
883     for (j=0; j<n; j++) {
884       xb = x + 4*(*idx++);
885       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
886       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
887       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
888       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
889       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
890       v += 16;
891     }
892     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
893     if (!usecprow){
894       z += 4; y += 4;
895     }
896   }
897   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
898   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
899   if (zz != yy) {
900     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
901   }
902   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
903   PetscFunctionReturn(0);
904 }
905 
906 #undef __FUNCT__
907 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5"
908 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
909 {
910   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
911   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
912   PetscScalar    *yarray,*zarray;
913   MatScalar      *v;
914   PetscErrorCode ierr;
915   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
916   PetscTruth     usecprow=a->compressedrow.use;
917 
918   PetscFunctionBegin;
919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
920   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
921   if (zz != yy) {
922     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
923   } else {
924     zarray = yarray;
925   }
926 
927   idx = a->j;
928   v   = a->a;
929   if (usecprow){
930     if (zz != yy){
931       ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
932     }
933     mbs  = a->compressedrow.nrows;
934     ii   = a->compressedrow.i;
935     ridx = a->compressedrow.rindex;
936   } else {
937     ii  = a->i;
938     y   = yarray;
939     z   = zarray;
940   }
941 
942   for (i=0; i<mbs; i++) {
943     n  = ii[1] - ii[0]; ii++;
944     if (usecprow){
945       z = zarray + 5*ridx[i];
946       y = yarray + 5*ridx[i];
947     }
948     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
949     for (j=0; j<n; j++) {
950       xb = x + 5*(*idx++);
951       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
952       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
953       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
954       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
955       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
956       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
957       v += 25;
958     }
959     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
960     if (!usecprow){
961       z += 5; y += 5;
962     }
963   }
964   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
965   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
966   if (zz != yy) {
967     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
968   }
969   ierr = PetscLogFlops(50.0*a->nz);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 #undef __FUNCT__
973 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6"
974 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
975 {
976   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
977   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
978   PetscScalar    x1,x2,x3,x4,x5,x6,*yarray,*zarray;
979   MatScalar      *v;
980   PetscErrorCode ierr;
981   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
982   PetscTruth     usecprow=a->compressedrow.use;
983 
984   PetscFunctionBegin;
985   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
986   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
987   if (zz != yy) {
988     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
989   } else {
990     zarray = yarray;
991   }
992 
993   idx = a->j;
994   v   = a->a;
995   if (usecprow){
996     if (zz != yy){
997       ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
998     }
999     mbs  = a->compressedrow.nrows;
1000     ii   = a->compressedrow.i;
1001     ridx = a->compressedrow.rindex;
1002   } else {
1003     ii  = a->i;
1004     y   = yarray;
1005     z   = zarray;
1006   }
1007 
1008   for (i=0; i<mbs; i++) {
1009     n  = ii[1] - ii[0]; ii++;
1010     if (usecprow){
1011       z = zarray + 6*ridx[i];
1012       y = yarray + 6*ridx[i];
1013     }
1014     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
1015     for (j=0; j<n; j++) {
1016       xb = x + 6*(*idx++);
1017       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1018       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
1019       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
1020       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
1021       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
1022       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
1023       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
1024       v += 36;
1025     }
1026     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
1027     if (!usecprow){
1028       z += 6; y += 6;
1029     }
1030   }
1031   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1032   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1033   if (zz != yy) {
1034     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1035   }
1036   ierr = PetscLogFlops(72.0*a->nz);CHKERRQ(ierr);
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 #undef __FUNCT__
1041 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7"
1042 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1043 {
1044   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1045   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1046   PetscScalar    x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray;
1047   MatScalar      *v;
1048   PetscErrorCode ierr;
1049   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
1050   PetscTruth     usecprow=a->compressedrow.use;
1051 
1052   PetscFunctionBegin;
1053   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1054   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
1055   if (zz != yy) {
1056     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1057   } else {
1058     zarray = yarray;
1059   }
1060 
1061   idx = a->j;
1062   v   = a->a;
1063   if (usecprow){
1064     if (zz != yy){
1065       ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1066     }
1067     mbs  = a->compressedrow.nrows;
1068     ii   = a->compressedrow.i;
1069     ridx = a->compressedrow.rindex;
1070   } else {
1071     ii  = a->i;
1072     y   = yarray;
1073     z   = zarray;
1074   }
1075 
1076   for (i=0; i<mbs; i++) {
1077     n  = ii[1] - ii[0]; ii++;
1078     if (usecprow){
1079       z = zarray + 7*ridx[i];
1080       y = yarray + 7*ridx[i];
1081     }
1082     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1083     for (j=0; j<n; j++) {
1084       xb = x + 7*(*idx++);
1085       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1086       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1087       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1088       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1089       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1090       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1091       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1092       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1093       v += 49;
1094     }
1095     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1096     if (!usecprow){
1097       z += 7; y += 7;
1098     }
1099   }
1100   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1101   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1102   if (zz != yy) {
1103     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1104   }
1105   ierr = PetscLogFlops(98.0*a->nz);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N"
1111 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1112 {
1113   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1114   PetscScalar    *x,*z = 0,*xb,*work,*workt,*zarray;
1115   MatScalar      *v;
1116   PetscErrorCode ierr;
1117   PetscInt       mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2;
1118   PetscInt       ncols,k,*ridx=PETSC_NULL;
1119   PetscTruth     usecprow=a->compressedrow.use;
1120 
1121   PetscFunctionBegin;
1122   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
1123   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1124   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1125 
1126   idx = a->j;
1127   v   = a->a;
1128   if (usecprow){
1129     mbs    = a->compressedrow.nrows;
1130     ii     = a->compressedrow.i;
1131     ridx = a->compressedrow.rindex;
1132   } else {
1133     mbs = a->mbs;
1134     ii  = a->i;
1135     z   = zarray;
1136   }
1137 
1138   if (!a->mult_work) {
1139     k    = PetscMax(A->rmap->n,A->cmap->n);
1140     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1141   }
1142   work = a->mult_work;
1143   for (i=0; i<mbs; i++) {
1144     n     = ii[1] - ii[0]; ii++;
1145     ncols = n*bs;
1146     workt = work;
1147     for (j=0; j<n; j++) {
1148       xb = x + bs*(*idx++);
1149       for (k=0; k<bs; k++) workt[k] = xb[k];
1150       workt += bs;
1151     }
1152     if (usecprow) z = zarray + bs*ridx[i];
1153     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1154     /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
1155     v += n*bs2;
1156     if (!usecprow){
1157       z += bs;
1158     }
1159   }
1160   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1161   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1162   ierr = PetscLogFlops(2.0*a->nz*bs2);CHKERRQ(ierr);
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 #undef __FUNCT__
1167 #define __FUNCT__ "MatMultTranspose_SeqBAIJ"
1168 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
1169 {
1170   PetscScalar    zero = 0.0;
1171   PetscErrorCode ierr;
1172 
1173   PetscFunctionBegin;
1174   ierr = VecSet(zz,zero);CHKERRQ(ierr);
1175   ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr);
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ"
1181 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1182 
1183 {
1184   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1185   PetscScalar       *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5;
1186   MatScalar         *v;
1187   PetscErrorCode    ierr;
1188   PetscInt          mbs,i,*idx,*ii,rval,bs=A->rmap->bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL;
1189   Mat_CompressedRow cprow = a->compressedrow;
1190   PetscTruth        usecprow=cprow.use;
1191 
1192   PetscFunctionBegin;
1193   if (yy != zz) { ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); }
1194   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1195   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
1196 
1197   idx = a->j;
1198   v   = a->a;
1199   if (usecprow){
1200     mbs  = cprow.nrows;
1201     ii   = cprow.i;
1202     ridx = cprow.rindex;
1203   } else {
1204     mbs=a->mbs;
1205     ii = a->i;
1206     xb = x;
1207   }
1208 
1209   switch (bs) {
1210   case 1:
1211     for (i=0; i<mbs; i++) {
1212       if (usecprow) xb = x + ridx[i];
1213       x1 = xb[0];
1214       ib = idx + ii[0];
1215       n  = ii[1] - ii[0]; ii++;
1216       for (j=0; j<n; j++) {
1217         rval    = ib[j];
1218         z[rval] += *v * x1;
1219         v++;
1220       }
1221       if (!usecprow) xb++;
1222     }
1223     break;
1224   case 2:
1225     for (i=0; i<mbs; i++) {
1226       if (usecprow) xb = x + 2*ridx[i];
1227       x1 = xb[0]; x2 = xb[1];
1228       ib = idx + ii[0];
1229       n  = ii[1] - ii[0]; ii++;
1230       for (j=0; j<n; j++) {
1231         rval      = ib[j]*2;
1232         z[rval++] += v[0]*x1 + v[1]*x2;
1233         z[rval++] += v[2]*x1 + v[3]*x2;
1234         v  += 4;
1235       }
1236       if (!usecprow) xb += 2;
1237     }
1238     break;
1239   case 3:
1240     for (i=0; i<mbs; i++) {
1241       if (usecprow) xb = x + 3*ridx[i];
1242       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1243       ib = idx + ii[0];
1244       n  = ii[1] - ii[0]; ii++;
1245       for (j=0; j<n; j++) {
1246         rval      = ib[j]*3;
1247         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1248         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1249         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1250         v  += 9;
1251       }
1252       if (!usecprow) xb += 3;
1253     }
1254     break;
1255   case 4:
1256     for (i=0; i<mbs; i++) {
1257       if (usecprow) xb = x + 4*ridx[i];
1258       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1259       ib = idx + ii[0];
1260       n  = ii[1] - ii[0]; ii++;
1261       for (j=0; j<n; j++) {
1262         rval      = ib[j]*4;
1263         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1264         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1265         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1266         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1267         v  += 16;
1268       }
1269       if (!usecprow) xb += 4;
1270     }
1271     break;
1272   case 5:
1273     for (i=0; i<mbs; i++) {
1274       if (usecprow) xb = x + 5*ridx[i];
1275       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1276       x4 = xb[3]; x5 = xb[4];
1277       ib = idx + ii[0];
1278       n  = ii[1] - ii[0]; ii++;
1279       for (j=0; j<n; j++) {
1280         rval      = ib[j]*5;
1281         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1282         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1283         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1284         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1285         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1286         v  += 25;
1287       }
1288       if (!usecprow) xb += 5;
1289     }
1290     break;
1291   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
1292       PetscInt     ncols,k;
1293       PetscScalar  *work,*workt,*xtmp;
1294 
1295       if (!a->mult_work) {
1296         k = PetscMax(A->rmap->n,A->cmap->n);
1297         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1298       }
1299       work = a->mult_work;
1300       xtmp = x;
1301       for (i=0; i<mbs; i++) {
1302         n     = ii[1] - ii[0]; ii++;
1303         ncols = n*bs;
1304         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1305         if (usecprow) {
1306           xtmp = x + bs*ridx[i];
1307         }
1308         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work);
1309         /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */
1310         v += n*bs2;
1311         if (!usecprow) xtmp += bs;
1312         workt = work;
1313         for (j=0; j<n; j++) {
1314           zb = z + bs*(*idx++);
1315           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1316           workt += bs;
1317         }
1318       }
1319     }
1320   }
1321   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1322   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
1323   ierr = PetscLogFlops(2.0*a->nz*a->bs2);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatScale_SeqBAIJ"
1329 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha)
1330 {
1331   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1332   PetscInt       totalnz = a->bs2*a->nz;
1333   PetscScalar    oalpha = alpha;
1334   PetscErrorCode ierr;
1335   PetscBLASInt   one = 1,tnz = PetscBLASIntCast(totalnz);
1336 
1337   PetscFunctionBegin;
1338   BLASscal_(&tnz,&oalpha,a->a,&one);
1339   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatNorm_SeqBAIJ"
1345 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
1346 {
1347   PetscErrorCode ierr;
1348   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1349   MatScalar      *v = a->a;
1350   PetscReal      sum = 0.0;
1351   PetscInt       i,j,k,bs=A->rmap->bs,nz=a->nz,bs2=a->bs2,k1;
1352 
1353   PetscFunctionBegin;
1354   if (type == NORM_FROBENIUS) {
1355     for (i=0; i< bs2*nz; i++) {
1356 #if defined(PETSC_USE_COMPLEX)
1357       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1358 #else
1359       sum += (*v)*(*v); v++;
1360 #endif
1361     }
1362     *norm = sqrt(sum);
1363   } else if (type == NORM_1) { /* maximum column sum */
1364     PetscReal *tmp;
1365     PetscInt  *bcol = a->j;
1366     ierr = PetscMalloc((A->cmap->n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1367     ierr = PetscMemzero(tmp,A->cmap->n*sizeof(PetscReal));CHKERRQ(ierr);
1368     for (i=0; i<nz; i++){
1369       for (j=0; j<bs; j++){
1370         k1 = bs*(*bcol) + j; /* column index */
1371         for (k=0; k<bs; k++){
1372           tmp[k1] += PetscAbsScalar(*v); v++;
1373         }
1374       }
1375       bcol++;
1376     }
1377     *norm = 0.0;
1378     for (j=0; j<A->cmap->n; j++) {
1379       if (tmp[j] > *norm) *norm = tmp[j];
1380     }
1381     ierr = PetscFree(tmp);CHKERRQ(ierr);
1382   } else if (type == NORM_INFINITY) { /* maximum row sum */
1383     *norm = 0.0;
1384     for (k=0; k<bs; k++) {
1385       for (j=0; j<a->mbs; j++) {
1386         v = a->a + bs2*a->i[j] + k;
1387         sum = 0.0;
1388         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
1389           for (k1=0; k1<bs; k1++){
1390             sum += PetscAbsScalar(*v);
1391             v   += bs;
1392           }
1393         }
1394         if (sum > *norm) *norm = sum;
1395       }
1396     }
1397   } else {
1398     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatEqual_SeqBAIJ"
1406 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg)
1407 {
1408   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
1409   PetscErrorCode ierr;
1410 
1411   PetscFunctionBegin;
1412   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1413   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1414     *flg = PETSC_FALSE;
1415     PetscFunctionReturn(0);
1416   }
1417 
1418   /* if the a->i are the same */
1419   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1420   if (!*flg) {
1421     PetscFunctionReturn(0);
1422   }
1423 
1424   /* if a->j are the same */
1425   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1426   if (!*flg) {
1427     PetscFunctionReturn(0);
1428   }
1429   /* if a->a are the same */
1430   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(B->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1431   PetscFunctionReturn(0);
1432 
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ"
1437 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1438 {
1439   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1440   PetscErrorCode ierr;
1441   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1442   PetscScalar    *x,zero = 0.0;
1443   MatScalar      *aa,*aa_j;
1444 
1445   PetscFunctionBegin;
1446   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1447   bs   = A->rmap->bs;
1448   aa   = a->a;
1449   ai   = a->i;
1450   aj   = a->j;
1451   ambs = a->mbs;
1452   bs2  = a->bs2;
1453 
1454   ierr = VecSet(v,zero);CHKERRQ(ierr);
1455   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1456   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1457   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1458   for (i=0; i<ambs; i++) {
1459     for (j=ai[i]; j<ai[i+1]; j++) {
1460       if (aj[j] == i) {
1461         row  = i*bs;
1462         aa_j = aa+j*bs2;
1463         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1464         break;
1465       }
1466     }
1467   }
1468   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ"
1474 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1475 {
1476   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1477   PetscScalar    *l,*r,x,*li,*ri;
1478   MatScalar      *aa,*v;
1479   PetscErrorCode ierr;
1480   PetscInt       i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1481 
1482   PetscFunctionBegin;
1483   ai  = a->i;
1484   aj  = a->j;
1485   aa  = a->a;
1486   m   = A->rmap->n;
1487   n   = A->cmap->n;
1488   bs  = A->rmap->bs;
1489   mbs = a->mbs;
1490   bs2 = a->bs2;
1491   if (ll) {
1492     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
1493     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1494     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1495     for (i=0; i<mbs; i++) { /* for each block row */
1496       M  = ai[i+1] - ai[i];
1497       li = l + i*bs;
1498       v  = aa + bs2*ai[i];
1499       for (j=0; j<M; j++) { /* for each block */
1500         for (k=0; k<bs2; k++) {
1501           (*v++) *= li[k%bs];
1502         }
1503       }
1504     }
1505     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1506     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1507   }
1508 
1509   if (rr) {
1510     ierr = VecGetArray(rr,&r);CHKERRQ(ierr);
1511     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1512     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1513     for (i=0; i<mbs; i++) { /* for each block row */
1514       M  = ai[i+1] - ai[i];
1515       v  = aa + bs2*ai[i];
1516       for (j=0; j<M; j++) { /* for each block */
1517         ri = r + bs*aj[ai[i]+j];
1518         for (k=0; k<bs; k++) {
1519           x = ri[k];
1520           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1521         }
1522       }
1523     }
1524     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
1525     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1526   }
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatGetInfo_SeqBAIJ"
1533 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1534 {
1535   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1536 
1537   PetscFunctionBegin;
1538   info->block_size     = a->bs2;
1539   info->nz_allocated   = a->maxnz;
1540   info->nz_used        = a->bs2*a->nz;
1541   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1542   info->assemblies   = A->num_ass;
1543   info->mallocs      = a->reallocs;
1544   info->memory       = ((PetscObject)A)->mem;
1545   if (A->factor) {
1546     info->fill_ratio_given  = A->info.fill_ratio_given;
1547     info->fill_ratio_needed = A->info.fill_ratio_needed;
1548     info->factor_mallocs    = A->info.factor_mallocs;
1549   } else {
1550     info->fill_ratio_given  = 0;
1551     info->fill_ratio_needed = 0;
1552     info->factor_mallocs    = 0;
1553   }
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatZeroEntries_SeqBAIJ"
1560 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A)
1561 {
1562   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1563   PetscErrorCode ierr;
1564 
1565   PetscFunctionBegin;
1566   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
1567   PetscFunctionReturn(0);
1568 }
1569