xref: /petsc/src/mat/impls/baij/seq/baij2.c (revision ef66eb6987ddfdf4e414d6b820cbc8d8d7d17bc2)
1*ef66eb69SBarry Smith /*$Id: baij2.c,v 1.74 2001/08/07 03:02:55 balay Exp bsmith $*/
2cac129eeSSatish Balay 
3e090d566SSatish Balay #include "petscsys.h"
470f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
52d61bbb3SSatish Balay #include "src/vec/vecimpl.h"
62d61bbb3SSatish Balay #include "src/inline/spops.h"
73f1db9ecSBarry Smith #include "src/inline/ilu.h"
80a835dfdSSatish Balay #include "petscbt.h"
9cac129eeSSatish Balay 
104a2ae208SSatish Balay #undef __FUNCT__
114a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
12736121d4SSatish Balay int MatIncreaseOverlap_SeqBAIJ(Mat A,int is_max,IS *is,int ov)
13a3192f15SSatish Balay {
14a3192f15SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
15218c64b6SSatish Balay   int         row,i,j,k,l,m,n,*idx,ierr,*nidx,isz,val,ival;
16218c64b6SSatish Balay   int         start,end,*ai,*aj,bs,*nidx2;
17f1af5d2fSBarry Smith   PetscBT     table;
18a3192f15SSatish Balay 
193a40ed3dSBarry Smith   PetscFunctionBegin;
20a3192f15SSatish Balay   m     = a->mbs;
21a3192f15SSatish Balay   ai    = a->i;
22a3192f15SSatish Balay   aj    = a->j;
23218c64b6SSatish Balay   bs    = a->bs;
24a3192f15SSatish Balay 
2529bbc08cSBarry Smith   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
26a3192f15SSatish Balay 
276831982aSBarry Smith   ierr = PetscBTCreate(m,table);CHKERRQ(ierr);
28b0a32e0cSBarry Smith   ierr = PetscMalloc((m+1)*sizeof(int),&nidx);CHKERRQ(ierr);
29b0a32e0cSBarry Smith   ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr);
30a3192f15SSatish Balay 
31a3192f15SSatish Balay   for (i=0; i<is_max; i++) {
32a3192f15SSatish Balay     /* Initialise the two local arrays */
33a3192f15SSatish Balay     isz  = 0;
346831982aSBarry Smith     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
35a3192f15SSatish Balay 
36a3192f15SSatish Balay     /* Extract the indices, assume there can be duplicate entries */
37a3192f15SSatish Balay     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
38b9b97703SBarry Smith     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
39a3192f15SSatish Balay 
40a3192f15SSatish Balay     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
41a3192f15SSatish Balay     for (j=0; j<n ; ++j){
42218c64b6SSatish Balay       ival = idx[j]/bs; /* convert the indices into block indices */
4329bbc08cSBarry Smith       if (ival>m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
44f1af5d2fSBarry Smith       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
45a3192f15SSatish Balay     }
46a3192f15SSatish Balay     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
47a3192f15SSatish Balay     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
48a3192f15SSatish Balay 
49a3192f15SSatish Balay     k = 0;
50a3192f15SSatish Balay     for (j=0; j<ov; j++){ /* for each overlap*/
51a3192f15SSatish Balay       n = isz;
52a3192f15SSatish Balay       for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
53a3192f15SSatish Balay         row   = nidx[k];
54a3192f15SSatish Balay         start = ai[row];
55a3192f15SSatish Balay         end   = ai[row+1];
56a3192f15SSatish Balay         for (l = start; l<end ; l++){
57a3192f15SSatish Balay           val = aj[l];
58f1af5d2fSBarry Smith           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
59a3192f15SSatish Balay         }
60a3192f15SSatish Balay       }
61a3192f15SSatish Balay     }
62218c64b6SSatish Balay     /* expand the Index Set */
63218c64b6SSatish Balay     for (j=0; j<isz; j++) {
64218c64b6SSatish Balay       for (k=0; k<bs; k++)
65218c64b6SSatish Balay         nidx2[j*bs+k] = nidx[j]*bs+k;
66218c64b6SSatish Balay     }
67329f5518SBarry Smith     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
68a3192f15SSatish Balay   }
696831982aSBarry Smith   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
70606d414cSSatish Balay   ierr = PetscFree(nidx);CHKERRQ(ierr);
71606d414cSSatish Balay   ierr = PetscFree(nidx2);CHKERRQ(ierr);
723a40ed3dSBarry Smith   PetscFunctionReturn(0);
73a3192f15SSatish Balay }
741c351548SSatish Balay 
754a2ae208SSatish Balay #undef __FUNCT__
764a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private"
777b2a1423SBarry Smith int MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
78736121d4SSatish Balay {
79736121d4SSatish Balay   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data,*c;
8040011551SBarry Smith   int          *smap,i,k,kstart,kend,ierr,oldcols = a->nbs,*lens;
81218c64b6SSatish Balay   int          row,mat_i,*mat_j,tcol,*mat_ilen;
82736121d4SSatish Balay   int          *irow,*icol,nrows,ncols,*ssmap,bs=a->bs,bs2=a->bs2;
83218c64b6SSatish Balay   int          *aj = a->j,*ai = a->i;
843f1db9ecSBarry Smith   MatScalar    *mat_a;
85736121d4SSatish Balay   Mat          C;
860f5bd95cSBarry Smith   PetscTruth   flag;
87736121d4SSatish Balay 
883a40ed3dSBarry Smith   PetscFunctionBegin;
89888f2ed8SSatish Balay   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
9029bbc08cSBarry Smith   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
91736121d4SSatish Balay 
92736121d4SSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
93218c64b6SSatish Balay   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
94b9b97703SBarry Smith   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
95b9b97703SBarry Smith   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
96736121d4SSatish Balay 
97b0a32e0cSBarry Smith   ierr = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr);
98736121d4SSatish Balay   ssmap = smap;
99b0a32e0cSBarry Smith   ierr = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr);
100549d3d68SSatish Balay   ierr  = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr);
101736121d4SSatish Balay   for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
102736121d4SSatish Balay   /* determine lens of each row */
103736121d4SSatish Balay   for (i=0; i<nrows; i++) {
104736121d4SSatish Balay     kstart  = ai[irow[i]];
105736121d4SSatish Balay     kend    = kstart + a->ilen[irow[i]];
106736121d4SSatish Balay     lens[i] = 0;
107736121d4SSatish Balay       for (k=kstart; k<kend; k++) {
108736121d4SSatish Balay         if (ssmap[aj[k]]) {
109736121d4SSatish Balay           lens[i]++;
110736121d4SSatish Balay         }
111736121d4SSatish Balay       }
112736121d4SSatish Balay     }
113736121d4SSatish Balay   /* Create and fill new matrix */
114736121d4SSatish Balay   if (scall == MAT_REUSE_MATRIX) {
115736121d4SSatish Balay     c = (Mat_SeqBAIJ *)((*B)->data);
116736121d4SSatish Balay 
11729bbc08cSBarry Smith     if (c->mbs!=nrows || c->nbs!=ncols || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
1180f5bd95cSBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr);
1190f5bd95cSBarry Smith     if (flag == PETSC_FALSE) {
12029bbc08cSBarry Smith       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
121736121d4SSatish Balay     }
122549d3d68SSatish Balay     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr);
123736121d4SSatish Balay     C = *B;
1243a40ed3dSBarry Smith   } else {
125736121d4SSatish Balay     ierr = MatCreateSeqBAIJ(A->comm,bs,nrows*bs,ncols*bs,0,lens,&C);CHKERRQ(ierr);
126736121d4SSatish Balay   }
127736121d4SSatish Balay   c = (Mat_SeqBAIJ *)(C->data);
128736121d4SSatish Balay   for (i=0; i<nrows; i++) {
129736121d4SSatish Balay     row    = irow[i];
130736121d4SSatish Balay     kstart = ai[row];
131736121d4SSatish Balay     kend   = kstart + a->ilen[row];
132736121d4SSatish Balay     mat_i  = c->i[i];
133736121d4SSatish Balay     mat_j  = c->j + mat_i;
134218c64b6SSatish Balay     mat_a  = c->a + mat_i*bs2;
135736121d4SSatish Balay     mat_ilen = c->ilen + i;
136736121d4SSatish Balay     for (k=kstart; k<kend; k++) {
137736121d4SSatish Balay       if ((tcol=ssmap[a->j[k]])) {
138736121d4SSatish Balay         *mat_j++ = tcol - 1;
139549d3d68SSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
140549d3d68SSatish Balay         mat_a   += bs2;
141736121d4SSatish Balay         (*mat_ilen)++;
142736121d4SSatish Balay       }
143736121d4SSatish Balay     }
144736121d4SSatish Balay   }
145218c64b6SSatish Balay 
146736121d4SSatish Balay   /* Free work space */
147736121d4SSatish Balay   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
148606d414cSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
149606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1506d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1516d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
152736121d4SSatish Balay 
153736121d4SSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
154736121d4SSatish Balay   *B = C;
1553a40ed3dSBarry Smith   PetscFunctionReturn(0);
156736121d4SSatish Balay }
157736121d4SSatish Balay 
1584a2ae208SSatish Balay #undef __FUNCT__
1594a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ"
1607b2a1423SBarry Smith int MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
161218c64b6SSatish Balay {
162218c64b6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
163218c64b6SSatish Balay   IS          is1,is2;
164218c64b6SSatish Balay   int         *vary,*iary,*irow,*icol,nrows,ncols,i,ierr,bs=a->bs,count;
165218c64b6SSatish Balay 
1663a40ed3dSBarry Smith   PetscFunctionBegin;
167218c64b6SSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
168218c64b6SSatish Balay   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
169b9b97703SBarry Smith   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
170b9b97703SBarry Smith   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
171218c64b6SSatish Balay 
172218c64b6SSatish Balay   /* Verify if the indices corespond to each element in a block
173218c64b6SSatish Balay    and form the IS with compressed IS */
174b0a32e0cSBarry Smith   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr);
175218c64b6SSatish Balay   iary = vary + a->mbs;
176549d3d68SSatish Balay   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
177218c64b6SSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
178218c64b6SSatish Balay   count = 0;
179218c64b6SSatish Balay   for (i=0; i<a->mbs; i++) {
180ac355199SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks");
181218c64b6SSatish Balay     if (vary[i]==bs) iary[count++] = i;
182218c64b6SSatish Balay   }
183029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
184218c64b6SSatish Balay 
185549d3d68SSatish Balay   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
186218c64b6SSatish Balay   for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
187218c64b6SSatish Balay   count = 0;
188218c64b6SSatish Balay   for (i=0; i<a->mbs; i++) {
189ac355199SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Internal error in PETSc");
190218c64b6SSatish Balay     if (vary[i]==bs) iary[count++] = i;
191218c64b6SSatish Balay   }
192029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr);
193218c64b6SSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
194218c64b6SSatish Balay   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
195606d414cSSatish Balay   ierr = PetscFree(vary);CHKERRQ(ierr);
196218c64b6SSatish Balay 
1976a6a5d1dSBarry Smith   ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr);
198218c64b6SSatish Balay   ISDestroy(is1);
199218c64b6SSatish Balay   ISDestroy(is2);
2003a40ed3dSBarry Smith   PetscFunctionReturn(0);
201218c64b6SSatish Balay }
202218c64b6SSatish Balay 
2034a2ae208SSatish Balay #undef __FUNCT__
2044a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ"
2057b2a1423SBarry Smith int MatGetSubMatrices_SeqBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B)
206736121d4SSatish Balay {
207736121d4SSatish Balay   int ierr,i;
208736121d4SSatish Balay 
2093a40ed3dSBarry Smith   PetscFunctionBegin;
210736121d4SSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
21182502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
212736121d4SSatish Balay   }
213736121d4SSatish Balay 
214736121d4SSatish Balay   for (i=0; i<n; i++) {
2156a6a5d1dSBarry Smith     ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
216736121d4SSatish Balay   }
2173a40ed3dSBarry Smith   PetscFunctionReturn(0);
218736121d4SSatish Balay }
219218c64b6SSatish Balay 
220218c64b6SSatish Balay 
2212d61bbb3SSatish Balay /* -------------------------------------------------------*/
2222d61bbb3SSatish Balay /* Should check that shapes of vectors and matrices match */
2232d61bbb3SSatish Balay /* -------------------------------------------------------*/
224d9eff348SSatish Balay #include "petscblaslapack.h"
2252d61bbb3SSatish Balay 
2264a2ae208SSatish Balay #undef __FUNCT__
2274a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_1"
2282d61bbb3SSatish Balay int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
2292d61bbb3SSatish Balay {
2302d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
23187828ca2SBarry Smith   PetscScalar     *x,*z,sum;
2323f1db9ecSBarry Smith   MatScalar       *v;
233e1311b90SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n,ierr;
2342d61bbb3SSatish Balay 
2352d61bbb3SSatish Balay   PetscFunctionBegin;
236e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
237e1311b90SBarry Smith   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
2382d61bbb3SSatish Balay 
2392d61bbb3SSatish Balay   idx   = a->j;
2402d61bbb3SSatish Balay   v     = a->a;
2412d61bbb3SSatish Balay   ii    = a->i;
2422d61bbb3SSatish Balay 
2432d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
2442d61bbb3SSatish Balay     n    = ii[1] - ii[0]; ii++;
2452d61bbb3SSatish Balay     sum  = 0.0;
2462d61bbb3SSatish Balay     while (n--) sum += *v++ * x[*idx++];
2472d61bbb3SSatish Balay     z[i] = sum;
2482d61bbb3SSatish Balay   }
249e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
250e1311b90SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
251b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz - A->m);
2522d61bbb3SSatish Balay   PetscFunctionReturn(0);
2532d61bbb3SSatish Balay }
2542d61bbb3SSatish Balay 
2554a2ae208SSatish Balay #undef __FUNCT__
2564a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_2"
2572d61bbb3SSatish Balay int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
2582d61bbb3SSatish Balay {
2592d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
26087828ca2SBarry Smith   PetscScalar     *x,*z,*xb,sum1,sum2;
26187828ca2SBarry Smith   PetscScalar     x1,x2;
2623f1db9ecSBarry Smith   MatScalar       *v;
263e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
2642d61bbb3SSatish Balay 
2652d61bbb3SSatish Balay   PetscFunctionBegin;
266e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
267e1311b90SBarry Smith   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
2682d61bbb3SSatish Balay 
2692d61bbb3SSatish Balay   idx   = a->j;
2702d61bbb3SSatish Balay   v     = a->a;
2712d61bbb3SSatish Balay   ii    = a->i;
2722d61bbb3SSatish Balay 
2732d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
2742d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
2752d61bbb3SSatish Balay     sum1 = 0.0; sum2 = 0.0;
2762d61bbb3SSatish Balay     for (j=0; j<n; j++) {
2772d61bbb3SSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
2782d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
2792d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
2802d61bbb3SSatish Balay       v += 4;
2812d61bbb3SSatish Balay     }
2822d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2;
2832d61bbb3SSatish Balay     z += 2;
2842d61bbb3SSatish Balay   }
285e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
286e1311b90SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
287b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - A->m);
2882d61bbb3SSatish Balay   PetscFunctionReturn(0);
2892d61bbb3SSatish Balay }
2902d61bbb3SSatish Balay 
2914a2ae208SSatish Balay #undef __FUNCT__
2924a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_3"
2932d61bbb3SSatish Balay int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
2942d61bbb3SSatish Balay {
2952d61bbb3SSatish Balay   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
29687828ca2SBarry Smith   PetscScalar  *x,*z,*xb,sum1,sum2,sum3,x1,x2,x3;
2973f1db9ecSBarry Smith   MatScalar    *v;
298e1311b90SBarry Smith   int          ierr,mbs=a->mbs,i,*idx,*ii,j,n;
2992d61bbb3SSatish Balay 
300aa482453SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
301fee21e36SBarry Smith #pragma disjoint(*v,*z,*xb)
302fee21e36SBarry Smith #endif
303fee21e36SBarry Smith 
3042d61bbb3SSatish Balay   PetscFunctionBegin;
305e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
306e1311b90SBarry Smith   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
3072d61bbb3SSatish Balay 
3082d61bbb3SSatish Balay   idx   = a->j;
3092d61bbb3SSatish Balay   v     = a->a;
3102d61bbb3SSatish Balay   ii    = a->i;
3112d61bbb3SSatish Balay 
3122d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
3132d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
3142d61bbb3SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
3152d61bbb3SSatish Balay     for (j=0; j<n; j++) {
3162d61bbb3SSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
3172d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
3182d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
3192d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
3202d61bbb3SSatish Balay       v += 9;
3212d61bbb3SSatish Balay     }
3222d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
3232d61bbb3SSatish Balay     z += 3;
3242d61bbb3SSatish Balay   }
325e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
326e1311b90SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
327b0a32e0cSBarry Smith   PetscLogFlops(18*a->nz - A->m);
3282d61bbb3SSatish Balay   PetscFunctionReturn(0);
3292d61bbb3SSatish Balay }
3302d61bbb3SSatish Balay 
3314a2ae208SSatish Balay #undef __FUNCT__
3324a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_4"
3332d61bbb3SSatish Balay int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
3342d61bbb3SSatish Balay {
3352d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
33687828ca2SBarry Smith   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
3373f1db9ecSBarry Smith   MatScalar       *v;
338e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
3392d61bbb3SSatish Balay 
3402d61bbb3SSatish Balay   PetscFunctionBegin;
341e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
342e1311b90SBarry Smith   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
3432d61bbb3SSatish Balay 
3442d61bbb3SSatish Balay   idx   = a->j;
3452d61bbb3SSatish Balay   v     = a->a;
3462d61bbb3SSatish Balay   ii    = a->i;
3472d61bbb3SSatish Balay 
3482d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
3492d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
3502d61bbb3SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
3512d61bbb3SSatish Balay     for (j=0; j<n; j++) {
3522d61bbb3SSatish Balay       xb = x + 4*(*idx++);
3532d61bbb3SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
3542d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
3552d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
3562d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
3572d61bbb3SSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
3582d61bbb3SSatish Balay       v += 16;
3592d61bbb3SSatish Balay     }
3602d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
3612d61bbb3SSatish Balay     z += 4;
3622d61bbb3SSatish Balay   }
363e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
364e1311b90SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
365b0a32e0cSBarry Smith   PetscLogFlops(32*a->nz - A->m);
3662d61bbb3SSatish Balay   PetscFunctionReturn(0);
3672d61bbb3SSatish Balay }
3682d61bbb3SSatish Balay 
3694a2ae208SSatish Balay #undef __FUNCT__
3704a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_5"
3712d61bbb3SSatish Balay int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
3722d61bbb3SSatish Balay {
3732d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
37487828ca2SBarry Smith   PetscScalar     sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*xb,*z,*x;
3753f1db9ecSBarry Smith   MatScalar       *v;
376e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
3772d61bbb3SSatish Balay 
378433994e6SBarry Smith   PetscFunctionBegin;
379e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
380e1311b90SBarry Smith   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
3812d61bbb3SSatish Balay 
3822d61bbb3SSatish Balay   idx   = a->j;
3832d61bbb3SSatish Balay   v     = a->a;
3842d61bbb3SSatish Balay   ii    = a->i;
3852d61bbb3SSatish Balay 
3862d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
3872d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
3882d61bbb3SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
3892d61bbb3SSatish Balay     for (j=0; j<n; j++) {
3902d61bbb3SSatish Balay       xb = x + 5*(*idx++);
3912d61bbb3SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
3922d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
3932d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
3942d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
3952d61bbb3SSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
3962d61bbb3SSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
3972d61bbb3SSatish Balay       v += 25;
3982d61bbb3SSatish Balay     }
3992d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
4002d61bbb3SSatish Balay     z += 5;
4012d61bbb3SSatish Balay   }
402e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
403e1311b90SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
404b0a32e0cSBarry Smith   PetscLogFlops(50*a->nz - A->m);
4052d61bbb3SSatish Balay   PetscFunctionReturn(0);
4062d61bbb3SSatish Balay }
4072d61bbb3SSatish Balay 
40815091d37SBarry Smith 
4094a2ae208SSatish Balay #undef __FUNCT__
4104a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_6"
41115091d37SBarry Smith int MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
41215091d37SBarry Smith {
41315091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
41487828ca2SBarry Smith   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
41587828ca2SBarry Smith   PetscScalar     x1,x2,x3,x4,x5,x6;
41615091d37SBarry Smith   MatScalar       *v;
41715091d37SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
41815091d37SBarry Smith 
419433994e6SBarry Smith   PetscFunctionBegin;
42015091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
42115091d37SBarry Smith   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
42215091d37SBarry Smith 
42315091d37SBarry Smith   idx   = a->j;
42415091d37SBarry Smith   v     = a->a;
42515091d37SBarry Smith   ii    = a->i;
42615091d37SBarry Smith 
42715091d37SBarry Smith   for (i=0; i<mbs; i++) {
42815091d37SBarry Smith     n  = ii[1] - ii[0]; ii++;
42915091d37SBarry Smith     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
43015091d37SBarry Smith     for (j=0; j<n; j++) {
43115091d37SBarry Smith       xb = x + 6*(*idx++);
43215091d37SBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
43315091d37SBarry Smith       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
43415091d37SBarry Smith       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
43515091d37SBarry Smith       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
43615091d37SBarry Smith       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
43715091d37SBarry Smith       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
43815091d37SBarry Smith       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
43915091d37SBarry Smith       v += 36;
44015091d37SBarry Smith     }
44115091d37SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
44215091d37SBarry Smith     z += 6;
44315091d37SBarry Smith   }
44415091d37SBarry Smith 
44515091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
44615091d37SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
447b0a32e0cSBarry Smith   PetscLogFlops(72*a->nz - A->m);
44815091d37SBarry Smith   PetscFunctionReturn(0);
44915091d37SBarry Smith }
4504a2ae208SSatish Balay #undef __FUNCT__
4514a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_7"
4522d61bbb3SSatish Balay int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
4532d61bbb3SSatish Balay {
4542d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
45587828ca2SBarry Smith   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
45687828ca2SBarry Smith   PetscScalar     x1,x2,x3,x4,x5,x6,x7;
4573f1db9ecSBarry Smith   MatScalar       *v;
458e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
4592d61bbb3SSatish Balay 
460433994e6SBarry Smith   PetscFunctionBegin;
461e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
462e1311b90SBarry Smith   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
4632d61bbb3SSatish Balay 
4642d61bbb3SSatish Balay   idx   = a->j;
4652d61bbb3SSatish Balay   v     = a->a;
4662d61bbb3SSatish Balay   ii    = a->i;
4672d61bbb3SSatish Balay 
4682d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
4692d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
4702d61bbb3SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
4712d61bbb3SSatish Balay     for (j=0; j<n; j++) {
4722d61bbb3SSatish Balay       xb = x + 7*(*idx++);
4732d61bbb3SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
4742d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
4752d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
4762d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
4772d61bbb3SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
4782d61bbb3SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
4792d61bbb3SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
4802d61bbb3SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
4812d61bbb3SSatish Balay       v += 49;
4822d61bbb3SSatish Balay     }
4832d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
4842d61bbb3SSatish Balay     z += 7;
4852d61bbb3SSatish Balay   }
4862d61bbb3SSatish Balay 
487e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
488e1311b90SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
489b0a32e0cSBarry Smith   PetscLogFlops(98*a->nz - A->m);
4902d61bbb3SSatish Balay   PetscFunctionReturn(0);
4912d61bbb3SSatish Balay }
4922d61bbb3SSatish Balay 
4933f1db9ecSBarry Smith /*
4943f1db9ecSBarry Smith     This will not work with MatScalar == float because it calls the BLAS
4953f1db9ecSBarry Smith */
4964a2ae208SSatish Balay #undef __FUNCT__
4974a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_N"
4982d61bbb3SSatish Balay int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
4992d61bbb3SSatish Balay {
5002d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
50187828ca2SBarry Smith   PetscScalar     *x,*z,*xb,*work,*workt;
5023f1db9ecSBarry Smith   MatScalar       *v;
503e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
5043f1db9ecSBarry Smith   int             ncols,k;
5052d61bbb3SSatish Balay 
5062d61bbb3SSatish Balay   PetscFunctionBegin;
507e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
508e1311b90SBarry Smith   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
5092d61bbb3SSatish Balay 
5102d61bbb3SSatish Balay   idx   = a->j;
5112d61bbb3SSatish Balay   v     = a->a;
5122d61bbb3SSatish Balay   ii    = a->i;
513218c64b6SSatish Balay 
514218c64b6SSatish Balay 
5152d61bbb3SSatish Balay   if (!a->mult_work) {
516273d9f13SBarry Smith     k    = PetscMax(A->m,A->n);
51787828ca2SBarry Smith     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
5182d61bbb3SSatish Balay   }
5192d61bbb3SSatish Balay   work = a->mult_work;
5202d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
5212d61bbb3SSatish Balay     n     = ii[1] - ii[0]; ii++;
5222d61bbb3SSatish Balay     ncols = n*bs;
5232d61bbb3SSatish Balay     workt = work;
5242d61bbb3SSatish Balay     for (j=0; j<n; j++) {
5252d61bbb3SSatish Balay       xb = x + bs*(*idx++);
5262d61bbb3SSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
5272d61bbb3SSatish Balay       workt += bs;
5282d61bbb3SSatish Balay     }
5293f1db9ecSBarry Smith     Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
5303f1db9ecSBarry Smith     /* LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
5312d61bbb3SSatish Balay     v += n*bs2;
5322d61bbb3SSatish Balay     z += bs;
5332d61bbb3SSatish Balay   }
534e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
535e1311b90SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
536b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz*bs2 - A->m);
5372d61bbb3SSatish Balay   PetscFunctionReturn(0);
5382d61bbb3SSatish Balay }
5392d61bbb3SSatish Balay 
5404a2ae208SSatish Balay #undef __FUNCT__
5414a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_1"
5422d61bbb3SSatish Balay int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
5432d61bbb3SSatish Balay {
5442d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
54587828ca2SBarry Smith   PetscScalar     *x,*y,*z,sum;
5463f1db9ecSBarry Smith   MatScalar       *v;
547e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,n;
5482d61bbb3SSatish Balay 
5492d61bbb3SSatish Balay   PetscFunctionBegin;
550e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
551e1311b90SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
5522e8a6d31SBarry Smith   if (zz != yy) {
553e1311b90SBarry Smith     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
5542e8a6d31SBarry Smith   } else {
5552e8a6d31SBarry Smith     z = y;
5562e8a6d31SBarry Smith   }
5572d61bbb3SSatish Balay 
5582d61bbb3SSatish Balay   idx   = a->j;
5592d61bbb3SSatish Balay   v     = a->a;
5602d61bbb3SSatish Balay   ii    = a->i;
5612d61bbb3SSatish Balay 
5622d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
5632d61bbb3SSatish Balay     n    = ii[1] - ii[0]; ii++;
5642d61bbb3SSatish Balay     sum  = y[i];
5652d61bbb3SSatish Balay     while (n--) sum += *v++ * x[*idx++];
5662d61bbb3SSatish Balay     z[i] = sum;
5672d61bbb3SSatish Balay   }
568e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
569e1311b90SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
5702e8a6d31SBarry Smith   if (zz != yy) {
571e1311b90SBarry Smith     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
5722e8a6d31SBarry Smith   }
573b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz);
5742d61bbb3SSatish Balay   PetscFunctionReturn(0);
5752d61bbb3SSatish Balay }
5762d61bbb3SSatish Balay 
5774a2ae208SSatish Balay #undef __FUNCT__
5784a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_2"
5792d61bbb3SSatish Balay int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
5802d61bbb3SSatish Balay {
5812d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
58287828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,sum1,sum2;
58387828ca2SBarry Smith   PetscScalar     x1,x2;
5843f1db9ecSBarry Smith   MatScalar       *v;
585e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
5862d61bbb3SSatish Balay 
5872d61bbb3SSatish Balay   PetscFunctionBegin;
588e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
589e1311b90SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
5902e8a6d31SBarry Smith   if (zz != yy) {
591e1311b90SBarry Smith     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
5922e8a6d31SBarry Smith   } else {
5932e8a6d31SBarry Smith     z = y;
5942e8a6d31SBarry Smith   }
5952d61bbb3SSatish Balay 
5962d61bbb3SSatish Balay   idx   = a->j;
5972d61bbb3SSatish Balay   v     = a->a;
5982d61bbb3SSatish Balay   ii    = a->i;
5992d61bbb3SSatish Balay 
6002d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
6012d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
6022d61bbb3SSatish Balay     sum1 = y[0]; sum2 = y[1];
6032d61bbb3SSatish Balay     for (j=0; j<n; j++) {
6042d61bbb3SSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
6052d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
6062d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
6072d61bbb3SSatish Balay       v += 4;
6082d61bbb3SSatish Balay     }
6092d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2;
6102d61bbb3SSatish Balay     z += 2; y += 2;
6112d61bbb3SSatish Balay   }
612e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
613e1311b90SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
6142e8a6d31SBarry Smith   if (zz != yy) {
615e1311b90SBarry Smith     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
6162e8a6d31SBarry Smith   }
617b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz);
6182d61bbb3SSatish Balay   PetscFunctionReturn(0);
6192d61bbb3SSatish Balay }
6202d61bbb3SSatish Balay 
6214a2ae208SSatish Balay #undef __FUNCT__
6224a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_3"
6232d61bbb3SSatish Balay int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
6242d61bbb3SSatish Balay {
6252d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
62687828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,x1,x2,x3;
6273f1db9ecSBarry Smith   MatScalar       *v;
628e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
6292d61bbb3SSatish Balay 
6302d61bbb3SSatish Balay   PetscFunctionBegin;
631e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
632e1311b90SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
6332e8a6d31SBarry Smith   if (zz != yy) {
634e1311b90SBarry Smith     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
6352e8a6d31SBarry Smith   } else {
6362e8a6d31SBarry Smith     z = y;
6372e8a6d31SBarry Smith   }
6382d61bbb3SSatish Balay 
6392d61bbb3SSatish Balay   idx   = a->j;
6402d61bbb3SSatish Balay   v     = a->a;
6412d61bbb3SSatish Balay   ii    = a->i;
6422d61bbb3SSatish Balay 
6432d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
6442d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
6452d61bbb3SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
6462d61bbb3SSatish Balay     for (j=0; j<n; j++) {
6472d61bbb3SSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
6482d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
6492d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
6502d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
6512d61bbb3SSatish Balay       v += 9;
6522d61bbb3SSatish Balay     }
6532d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
6542d61bbb3SSatish Balay     z += 3; y += 3;
6552d61bbb3SSatish Balay   }
656e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
657e1311b90SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
6582e8a6d31SBarry Smith   if (zz != yy) {
659e1311b90SBarry Smith     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
6602e8a6d31SBarry Smith   }
661b0a32e0cSBarry Smith   PetscLogFlops(18*a->nz);
6622d61bbb3SSatish Balay   PetscFunctionReturn(0);
6632d61bbb3SSatish Balay }
6642d61bbb3SSatish Balay 
6654a2ae208SSatish Balay #undef __FUNCT__
6664a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_4"
6672d61bbb3SSatish Balay int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
6682d61bbb3SSatish Balay {
6692d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
67087828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
6713f1db9ecSBarry Smith   MatScalar       *v;
672e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii;
6732d61bbb3SSatish Balay   int             j,n;
6742d61bbb3SSatish Balay 
6752d61bbb3SSatish Balay   PetscFunctionBegin;
676e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
677e1311b90SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
6782e8a6d31SBarry Smith   if (zz != yy) {
679e1311b90SBarry Smith     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
6802e8a6d31SBarry Smith   } else {
6812e8a6d31SBarry Smith     z = y;
6822e8a6d31SBarry Smith   }
6832d61bbb3SSatish Balay 
6842d61bbb3SSatish Balay   idx   = a->j;
6852d61bbb3SSatish Balay   v     = a->a;
6862d61bbb3SSatish Balay   ii    = a->i;
6872d61bbb3SSatish Balay 
6882d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
6892d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
6902d61bbb3SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
6912d61bbb3SSatish Balay     for (j=0; j<n; j++) {
6922d61bbb3SSatish Balay       xb = x + 4*(*idx++);
6932d61bbb3SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
6942d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
6952d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
6962d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
6972d61bbb3SSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
6982d61bbb3SSatish Balay       v += 16;
6992d61bbb3SSatish Balay     }
7002d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
7012d61bbb3SSatish Balay     z += 4; y += 4;
7022d61bbb3SSatish Balay   }
703e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
704e1311b90SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
7052e8a6d31SBarry Smith   if (zz != yy) {
706e1311b90SBarry Smith     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
7072e8a6d31SBarry Smith   }
708b0a32e0cSBarry Smith   PetscLogFlops(32*a->nz);
7092d61bbb3SSatish Balay   PetscFunctionReturn(0);
7102d61bbb3SSatish Balay }
7112d61bbb3SSatish Balay 
7124a2ae208SSatish Balay #undef __FUNCT__
7134a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_5"
7142d61bbb3SSatish Balay int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
7152d61bbb3SSatish Balay {
7162d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
71787828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
7183f1db9ecSBarry Smith   MatScalar       *v;
719e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
7202d61bbb3SSatish Balay 
7212d61bbb3SSatish Balay   PetscFunctionBegin;
722e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
723e1311b90SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
7242e8a6d31SBarry Smith   if (zz != yy) {
725e1311b90SBarry Smith     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
7262e8a6d31SBarry Smith   } else {
7272e8a6d31SBarry Smith     z = y;
7282e8a6d31SBarry Smith   }
7292d61bbb3SSatish Balay 
7302d61bbb3SSatish Balay   idx   = a->j;
7312d61bbb3SSatish Balay   v     = a->a;
7322d61bbb3SSatish Balay   ii    = a->i;
7332d61bbb3SSatish Balay 
7342d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
7352d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
7362d61bbb3SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
7372d61bbb3SSatish Balay     for (j=0; j<n; j++) {
7382d61bbb3SSatish Balay       xb = x + 5*(*idx++);
7392d61bbb3SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
7402d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
7412d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
7422d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
7432d61bbb3SSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
7442d61bbb3SSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
7452d61bbb3SSatish Balay       v += 25;
7462d61bbb3SSatish Balay     }
7472d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
7482d61bbb3SSatish Balay     z += 5; y += 5;
7492d61bbb3SSatish Balay   }
750e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
751e1311b90SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
7522e8a6d31SBarry Smith   if (zz != yy) {
753e1311b90SBarry Smith     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
7542e8a6d31SBarry Smith   }
755b0a32e0cSBarry Smith   PetscLogFlops(50*a->nz);
7562d61bbb3SSatish Balay   PetscFunctionReturn(0);
7572d61bbb3SSatish Balay }
7584a2ae208SSatish Balay #undef __FUNCT__
7594a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_6"
76015091d37SBarry Smith int MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
76115091d37SBarry Smith {
76215091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
76387828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
76487828ca2SBarry Smith   PetscScalar     x1,x2,x3,x4,x5,x6;
76515091d37SBarry Smith   MatScalar       *v;
76615091d37SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
76715091d37SBarry Smith 
76815091d37SBarry Smith   PetscFunctionBegin;
76915091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
77015091d37SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
77115091d37SBarry Smith   if (zz != yy) {
77215091d37SBarry Smith     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
77315091d37SBarry Smith   } else {
77415091d37SBarry Smith     z = y;
77515091d37SBarry Smith   }
77615091d37SBarry Smith 
77715091d37SBarry Smith   idx   = a->j;
77815091d37SBarry Smith   v     = a->a;
77915091d37SBarry Smith   ii    = a->i;
78015091d37SBarry Smith 
78115091d37SBarry Smith   for (i=0; i<mbs; i++) {
78215091d37SBarry Smith     n  = ii[1] - ii[0]; ii++;
78315091d37SBarry Smith     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
78415091d37SBarry Smith     for (j=0; j<n; j++) {
7853b95cb0eSSatish Balay       xb = x + 6*(*idx++);
78615091d37SBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
78715091d37SBarry Smith       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
78815091d37SBarry Smith       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
78915091d37SBarry Smith       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
79015091d37SBarry Smith       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
79115091d37SBarry Smith       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
79215091d37SBarry Smith       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
79315091d37SBarry Smith       v += 36;
79415091d37SBarry Smith     }
79515091d37SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
79615091d37SBarry Smith     z += 6; y += 6;
79715091d37SBarry Smith   }
79815091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
79915091d37SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
80015091d37SBarry Smith   if (zz != yy) {
80115091d37SBarry Smith     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
80215091d37SBarry Smith   }
803b0a32e0cSBarry Smith   PetscLogFlops(72*a->nz);
80415091d37SBarry Smith   PetscFunctionReturn(0);
80515091d37SBarry Smith }
8062d61bbb3SSatish Balay 
8074a2ae208SSatish Balay #undef __FUNCT__
8084a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_7"
8092d61bbb3SSatish Balay int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
8102d61bbb3SSatish Balay {
8112d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
81287828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
81387828ca2SBarry Smith   PetscScalar     x1,x2,x3,x4,x5,x6,x7;
8143f1db9ecSBarry Smith   MatScalar       *v;
815e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
8162d61bbb3SSatish Balay 
8172d61bbb3SSatish Balay   PetscFunctionBegin;
818e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
819e1311b90SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8202e8a6d31SBarry Smith   if (zz != yy) {
821e1311b90SBarry Smith     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
8222e8a6d31SBarry Smith   } else {
8232e8a6d31SBarry Smith     z = y;
8242e8a6d31SBarry Smith   }
8252d61bbb3SSatish Balay 
8262d61bbb3SSatish Balay   idx   = a->j;
8272d61bbb3SSatish Balay   v     = a->a;
8282d61bbb3SSatish Balay   ii    = a->i;
8292d61bbb3SSatish Balay 
8302d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
8312d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
8322d61bbb3SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
8332d61bbb3SSatish Balay     for (j=0; j<n; j++) {
8342d61bbb3SSatish Balay       xb = x + 7*(*idx++);
8352d61bbb3SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
8362d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
8372d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
8382d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
8392d61bbb3SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
8402d61bbb3SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
8412d61bbb3SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
8422d61bbb3SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
8432d61bbb3SSatish Balay       v += 49;
8442d61bbb3SSatish Balay     }
8452d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
8462d61bbb3SSatish Balay     z += 7; y += 7;
8472d61bbb3SSatish Balay   }
848e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
849e1311b90SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8502e8a6d31SBarry Smith   if (zz != yy) {
851e1311b90SBarry Smith     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
8522e8a6d31SBarry Smith   }
853b0a32e0cSBarry Smith   PetscLogFlops(98*a->nz);
8542d61bbb3SSatish Balay   PetscFunctionReturn(0);
8552d61bbb3SSatish Balay }
856218c64b6SSatish Balay 
8574a2ae208SSatish Balay #undef __FUNCT__
8584a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_N"
8592d61bbb3SSatish Balay int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
8602d61bbb3SSatish Balay {
8612d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
86287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,*work,*workt;
8633f1db9ecSBarry Smith   MatScalar      *v;
8642d61bbb3SSatish Balay   int            mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
8653f1db9ecSBarry Smith   int            ncols,k;
866218c64b6SSatish Balay 
8672d61bbb3SSatish Balay   PetscFunctionBegin;
8682d61bbb3SSatish Balay   if (xx != yy) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); }
8692d61bbb3SSatish Balay 
870e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
871e1311b90SBarry Smith   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
8722d61bbb3SSatish Balay 
8732d61bbb3SSatish Balay   idx   = a->j;
8742d61bbb3SSatish Balay   v     = a->a;
8752d61bbb3SSatish Balay   ii    = a->i;
8762d61bbb3SSatish Balay 
8772d61bbb3SSatish Balay 
8782d61bbb3SSatish Balay   if (!a->mult_work) {
879273d9f13SBarry Smith     k    = PetscMax(A->m,A->n);
88087828ca2SBarry Smith     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
8812d61bbb3SSatish Balay   }
8822d61bbb3SSatish Balay   work = a->mult_work;
8832d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
8842d61bbb3SSatish Balay     n     = ii[1] - ii[0]; ii++;
8852d61bbb3SSatish Balay     ncols = n*bs;
8862d61bbb3SSatish Balay     workt = work;
8872d61bbb3SSatish Balay     for (j=0; j<n; j++) {
8882d61bbb3SSatish Balay       xb = x + bs*(*idx++);
8892d61bbb3SSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
8902d61bbb3SSatish Balay       workt += bs;
8912d61bbb3SSatish Balay     }
8923f1db9ecSBarry Smith     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
8933f1db9ecSBarry Smith     /* LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
8942d61bbb3SSatish Balay     v += n*bs2;
8952d61bbb3SSatish Balay     z += bs;
8962d61bbb3SSatish Balay   }
897e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
898e1311b90SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
899b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz*bs2);
9002d61bbb3SSatish Balay   PetscFunctionReturn(0);
9012d61bbb3SSatish Balay }
9022d61bbb3SSatish Balay 
9034a2ae208SSatish Balay #undef __FUNCT__
9044a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_SeqBAIJ"
9057c922b88SBarry Smith int MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
9062d61bbb3SSatish Balay {
9072d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
90887828ca2SBarry Smith   PetscScalar     *xg,*zg,*zb,zero = 0.0;
90987828ca2SBarry Smith   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
9103f1db9ecSBarry Smith   MatScalar       *v;
911f1af5d2fSBarry Smith   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval;
9122d61bbb3SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
9132d61bbb3SSatish Balay 
9142d61bbb3SSatish Balay   PetscFunctionBegin;
915f1af5d2fSBarry Smith   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
916e1311b90SBarry Smith   ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg;
917e1311b90SBarry Smith   ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg;
9182d61bbb3SSatish Balay 
9192d61bbb3SSatish Balay   idx   = a->j;
9202d61bbb3SSatish Balay   v     = a->a;
9212d61bbb3SSatish Balay   ii    = a->i;
922f1af5d2fSBarry Smith   xb    = x;
9232d61bbb3SSatish Balay   switch (bs) {
9242d61bbb3SSatish Balay   case 1:
9252d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
9262d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
927f1af5d2fSBarry Smith       x1 = xb[0];
9282d61bbb3SSatish Balay       ib = idx + ai[i];
9292d61bbb3SSatish Balay       for (j=0; j<n; j++) {
9302d61bbb3SSatish Balay         rval    = ib[j];
931f1af5d2fSBarry Smith         z[rval] += *v * x1;
932f1af5d2fSBarry Smith         v++;
9332d61bbb3SSatish Balay       }
934f1af5d2fSBarry Smith       xb++;
9352d61bbb3SSatish Balay     }
9362d61bbb3SSatish Balay     break;
9372d61bbb3SSatish Balay   case 2:
9382d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
9392d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
940f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1];
9412d61bbb3SSatish Balay       ib = idx + ai[i];
9422d61bbb3SSatish Balay       for (j=0; j<n; j++) {
9432d61bbb3SSatish Balay         rval      = ib[j]*2;
9442d61bbb3SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
945f1af5d2fSBarry Smith         z[rval]   += v[2]*x1 + v[3]*x2;
9462d61bbb3SSatish Balay         v  += 4;
9472d61bbb3SSatish Balay       }
948f1af5d2fSBarry Smith       xb += 2;
9492d61bbb3SSatish Balay     }
9502d61bbb3SSatish Balay     break;
9512d61bbb3SSatish Balay   case 3:
9522d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
9532d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
954f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
9552d61bbb3SSatish Balay       ib = idx + ai[i];
9562d61bbb3SSatish Balay       for (j=0; j<n; j++) {
9572d61bbb3SSatish Balay         rval      = ib[j]*3;
9582d61bbb3SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
9592d61bbb3SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
960f1af5d2fSBarry Smith         z[rval]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
9612d61bbb3SSatish Balay         v  += 9;
9622d61bbb3SSatish Balay       }
963f1af5d2fSBarry Smith       xb += 3;
9642d61bbb3SSatish Balay     }
9652d61bbb3SSatish Balay     break;
9662d61bbb3SSatish Balay   case 4:
9672d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
9682d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
969f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
9702d61bbb3SSatish Balay       ib = idx + ai[i];
9712d61bbb3SSatish Balay       for (j=0; j<n; j++) {
9722d61bbb3SSatish Balay         rval      = ib[j]*4;
9732d61bbb3SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
9742d61bbb3SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
9752d61bbb3SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
976f1af5d2fSBarry Smith         z[rval]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
9772d61bbb3SSatish Balay         v  += 16;
9782d61bbb3SSatish Balay       }
979f1af5d2fSBarry Smith       xb += 4;
9802d61bbb3SSatish Balay     }
9812d61bbb3SSatish Balay     break;
9822d61bbb3SSatish Balay   case 5:
9832d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
9842d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
985f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
9862d61bbb3SSatish Balay       x4 = xb[3]; x5 = xb[4];
9872d61bbb3SSatish Balay       ib = idx + ai[i];
9882d61bbb3SSatish Balay       for (j=0; j<n; j++) {
9892d61bbb3SSatish Balay         rval      = ib[j]*5;
9902d61bbb3SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
9912d61bbb3SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
9922d61bbb3SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
9932d61bbb3SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
994f1af5d2fSBarry Smith         z[rval]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
9952d61bbb3SSatish Balay         v  += 25;
9962d61bbb3SSatish Balay       }
997f1af5d2fSBarry Smith       xb += 5;
9982d61bbb3SSatish Balay     }
9992d61bbb3SSatish Balay     break;
1000f1af5d2fSBarry Smith   case 6:
1001f1af5d2fSBarry Smith     for (i=0; i<mbs; i++) {
1002f1af5d2fSBarry Smith       n  = ii[1] - ii[0]; ii++;
1003f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1004f1af5d2fSBarry Smith       x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1005f1af5d2fSBarry Smith       ib = idx + ai[i];
1006f1af5d2fSBarry Smith       for (j=0; j<n; j++) {
1007f1af5d2fSBarry Smith         rval      = ib[j]*6;
1008f1af5d2fSBarry Smith         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6;
1009f1af5d2fSBarry Smith         z[rval++] +=  v[6]*x1 +  v[7]*x2 +  v[8]*x3 +  v[9]*x4 + v[10]*x5 + v[11]*x6;
1010f1af5d2fSBarry Smith         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
1011f1af5d2fSBarry Smith         z[rval++] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
1012f1af5d2fSBarry Smith         z[rval++] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
1013f1af5d2fSBarry Smith         z[rval]   += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
1014f1af5d2fSBarry Smith         v  += 36;
1015f1af5d2fSBarry Smith       }
1016f1af5d2fSBarry Smith       xb += 6;
1017f1af5d2fSBarry Smith     }
1018f1af5d2fSBarry Smith     break;
1019f1af5d2fSBarry Smith   case 7:
1020f1af5d2fSBarry Smith     for (i=0; i<mbs; i++) {
1021f1af5d2fSBarry Smith       n  = ii[1] - ii[0]; ii++;
1022f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1023f1af5d2fSBarry Smith       x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1024f1af5d2fSBarry Smith       ib = idx + ai[i];
1025f1af5d2fSBarry Smith       for (j=0; j<n; j++) {
1026f1af5d2fSBarry Smith         rval      = ib[j]*7;
1027f1af5d2fSBarry Smith         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7;
1028f1af5d2fSBarry Smith         z[rval++] +=  v[7]*x1 +  v[8]*x2 +  v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
1029f1af5d2fSBarry Smith         z[rval++] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
1030f1af5d2fSBarry Smith         z[rval++] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
1031f1af5d2fSBarry Smith         z[rval++] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
1032f1af5d2fSBarry Smith         z[rval++] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
1033f1af5d2fSBarry Smith         z[rval]   += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
1034f1af5d2fSBarry Smith         v  += 49;
1035f1af5d2fSBarry Smith       }
1036f1af5d2fSBarry Smith       xb += 7;
1037f1af5d2fSBarry Smith     }
1038f1af5d2fSBarry Smith     break;
1039f1af5d2fSBarry Smith   default: {       /* block sizes larger then 7 by 7 are handled by BLAS */
10403f1db9ecSBarry Smith       int          ncols,k;
104187828ca2SBarry Smith       PetscScalar  *work,*workt;
10423f1db9ecSBarry Smith 
10432d61bbb3SSatish Balay       if (!a->mult_work) {
1044273d9f13SBarry Smith         k = PetscMax(A->m,A->n);
104587828ca2SBarry Smith         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
10462d61bbb3SSatish Balay       }
10472d61bbb3SSatish Balay       work = a->mult_work;
10482d61bbb3SSatish Balay       for (i=0; i<mbs; i++) {
10492d61bbb3SSatish Balay         n     = ii[1] - ii[0]; ii++;
10502d61bbb3SSatish Balay         ncols = n*bs;
105187828ca2SBarry Smith         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1052d824769bSBarry Smith         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
10533f1db9ecSBarry Smith         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
10542d61bbb3SSatish Balay         v += n*bs2;
10552d61bbb3SSatish Balay         x += bs;
10562d61bbb3SSatish Balay         workt = work;
10572d61bbb3SSatish Balay         for (j=0; j<n; j++) {
10582d61bbb3SSatish Balay           zb = z + bs*(*idx++);
10592d61bbb3SSatish Balay           for (k=0; k<bs; k++) zb[k] += workt[k] ;
10602d61bbb3SSatish Balay           workt += bs;
10612d61bbb3SSatish Balay         }
10622d61bbb3SSatish Balay       }
10632d61bbb3SSatish Balay     }
10642d61bbb3SSatish Balay   }
10652d61bbb3SSatish Balay   ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr);
10662d61bbb3SSatish Balay   ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr);
1067b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz*a->bs2 - A->n);
10682d61bbb3SSatish Balay   PetscFunctionReturn(0);
10692d61bbb3SSatish Balay }
10702d61bbb3SSatish Balay 
10714a2ae208SSatish Balay #undef __FUNCT__
10724a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ"
10737c922b88SBarry Smith int MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
10742d61bbb3SSatish Balay 
10752d61bbb3SSatish Balay {
10762d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
107787828ca2SBarry Smith   PetscScalar     *xg,*zg,*zb,*x,*z,*xb,x1,x2,x3,x4,x5;
10783f1db9ecSBarry Smith   MatScalar       *v;
1079f1af5d2fSBarry Smith   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
10802d61bbb3SSatish Balay 
10812d61bbb3SSatish Balay   PetscFunctionBegin;
1082*ef66eb69SBarry Smith   if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); }
1083e1311b90SBarry Smith   ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg;
1084e1311b90SBarry Smith   ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg;
10852d61bbb3SSatish Balay 
10862d61bbb3SSatish Balay 
10872d61bbb3SSatish Balay   idx   = a->j;
10882d61bbb3SSatish Balay   v     = a->a;
10892d61bbb3SSatish Balay   ii    = a->i;
1090f1af5d2fSBarry Smith   xb    = x;
10912d61bbb3SSatish Balay 
10922d61bbb3SSatish Balay   switch (bs) {
10932d61bbb3SSatish Balay   case 1:
10942d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
10952d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
1096f1af5d2fSBarry Smith       x1 = xb[0];
10972d61bbb3SSatish Balay       ib = idx + ai[i];
10982d61bbb3SSatish Balay       for (j=0; j<n; j++) {
10992d61bbb3SSatish Balay         rval    = ib[j];
1100f1af5d2fSBarry Smith         z[rval] += *v * x1;
1101f1af5d2fSBarry Smith         v++;
11022d61bbb3SSatish Balay       }
1103f1af5d2fSBarry Smith       xb++;
11042d61bbb3SSatish Balay     }
11052d61bbb3SSatish Balay     break;
11062d61bbb3SSatish Balay   case 2:
11072d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
11082d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
1109f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1];
11102d61bbb3SSatish Balay       ib = idx + ai[i];
11112d61bbb3SSatish Balay       for (j=0; j<n; j++) {
11122d61bbb3SSatish Balay         rval      = ib[j]*2;
11132d61bbb3SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
11142d61bbb3SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
11152d61bbb3SSatish Balay         v  += 4;
11162d61bbb3SSatish Balay       }
1117f1af5d2fSBarry Smith       xb += 2;
11182d61bbb3SSatish Balay     }
11192d61bbb3SSatish Balay     break;
11202d61bbb3SSatish Balay   case 3:
11212d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
11222d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
1123f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
11242d61bbb3SSatish Balay       ib = idx + ai[i];
11252d61bbb3SSatish Balay       for (j=0; j<n; j++) {
11262d61bbb3SSatish Balay         rval      = ib[j]*3;
11272d61bbb3SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
11282d61bbb3SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
11292d61bbb3SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
11302d61bbb3SSatish Balay         v  += 9;
11312d61bbb3SSatish Balay       }
1132f1af5d2fSBarry Smith       xb += 3;
11332d61bbb3SSatish Balay     }
11342d61bbb3SSatish Balay     break;
11352d61bbb3SSatish Balay   case 4:
11362d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
11372d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
1138f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
11392d61bbb3SSatish Balay       ib = idx + ai[i];
11402d61bbb3SSatish Balay       for (j=0; j<n; j++) {
11412d61bbb3SSatish Balay         rval      = ib[j]*4;
11422d61bbb3SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
11432d61bbb3SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
11442d61bbb3SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
11452d61bbb3SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
11462d61bbb3SSatish Balay         v  += 16;
11472d61bbb3SSatish Balay       }
1148f1af5d2fSBarry Smith       xb += 4;
11492d61bbb3SSatish Balay     }
11502d61bbb3SSatish Balay     break;
11512d61bbb3SSatish Balay   case 5:
11522d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
11532d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
1154f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
11552d61bbb3SSatish Balay       x4 = xb[3]; x5 = xb[4];
11562d61bbb3SSatish Balay       ib = idx + ai[i];
11572d61bbb3SSatish Balay       for (j=0; j<n; j++) {
11582d61bbb3SSatish Balay         rval      = ib[j]*5;
11592d61bbb3SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
11602d61bbb3SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
11612d61bbb3SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
11622d61bbb3SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
11632d61bbb3SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
11642d61bbb3SSatish Balay         v  += 25;
11652d61bbb3SSatish Balay       }
1166f1af5d2fSBarry Smith       xb += 5;
11672d61bbb3SSatish Balay     }
11682d61bbb3SSatish Balay     break;
1169f1af5d2fSBarry Smith   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
11703f1db9ecSBarry Smith       int          ncols,k;
117187828ca2SBarry Smith       PetscScalar  *work,*workt;
11723f1db9ecSBarry Smith 
11732d61bbb3SSatish Balay       if (!a->mult_work) {
1174273d9f13SBarry Smith         k = PetscMax(A->m,A->n);
117587828ca2SBarry Smith         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
11762d61bbb3SSatish Balay       }
11772d61bbb3SSatish Balay       work = a->mult_work;
11782d61bbb3SSatish Balay       for (i=0; i<mbs; i++) {
11792d61bbb3SSatish Balay         n     = ii[1] - ii[0]; ii++;
11802d61bbb3SSatish Balay         ncols = n*bs;
118187828ca2SBarry Smith         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
11823f1db9ecSBarry Smith         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
11833f1db9ecSBarry Smith         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
11842d61bbb3SSatish Balay         v += n*bs2;
11852d61bbb3SSatish Balay         x += bs;
11862d61bbb3SSatish Balay         workt = work;
11872d61bbb3SSatish Balay         for (j=0; j<n; j++) {
11882d61bbb3SSatish Balay           zb = z + bs*(*idx++);
11892d61bbb3SSatish Balay           for (k=0; k<bs; k++) zb[k] += workt[k] ;
11902d61bbb3SSatish Balay           workt += bs;
11912d61bbb3SSatish Balay         }
11922d61bbb3SSatish Balay       }
11932d61bbb3SSatish Balay     }
11942d61bbb3SSatish Balay   }
11952d61bbb3SSatish Balay   ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr);
11962d61bbb3SSatish Balay   ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr);
1197b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz*a->bs2);
11982d61bbb3SSatish Balay   PetscFunctionReturn(0);
11992d61bbb3SSatish Balay }
12002d61bbb3SSatish Balay 
12014a2ae208SSatish Balay #undef __FUNCT__
12024a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqBAIJ"
120387828ca2SBarry Smith int MatScale_SeqBAIJ(PetscScalar *alpha,Mat inA)
12042d61bbb3SSatish Balay {
12052d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
120687052302SDinesh Kaushik   int         totalnz = a->bs2*a->nz;
12073eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
12083eda8832SBarry Smith   int         i;
120987052302SDinesh Kaushik #else
121087052302SDinesh Kaushik   int         one = 1;
12113eda8832SBarry Smith #endif
12122d61bbb3SSatish Balay 
12132d61bbb3SSatish Balay   PetscFunctionBegin;
12143eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
12153eda8832SBarry Smith   for (i=0; i<totalnz; i++) a->a[i] *= *alpha;
12163eda8832SBarry Smith #else
12172d61bbb3SSatish Balay   BLscal_(&totalnz,alpha,a->a,&one);
12183eda8832SBarry Smith #endif
1219b0a32e0cSBarry Smith   PetscLogFlops(totalnz);
12202d61bbb3SSatish Balay   PetscFunctionReturn(0);
12212d61bbb3SSatish Balay }
12222d61bbb3SSatish Balay 
12234a2ae208SSatish Balay #undef __FUNCT__
12244a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqBAIJ"
1225329f5518SBarry Smith int MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
12262d61bbb3SSatish Balay {
12272d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
12283f1db9ecSBarry Smith   MatScalar   *v = a->a;
1229329f5518SBarry Smith   PetscReal   sum = 0.0;
12300e90e235SBarry Smith   int         i,j,k,bs = a->bs,nz=a->nz,bs2=a->bs2,k1;
12312d61bbb3SSatish Balay 
12322d61bbb3SSatish Balay   PetscFunctionBegin;
12332d61bbb3SSatish Balay   if (type == NORM_FROBENIUS) {
12342d61bbb3SSatish Balay     for (i=0; i< bs2*nz; i++) {
1235aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
1236329f5518SBarry Smith       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
12372d61bbb3SSatish Balay #else
12382d61bbb3SSatish Balay       sum += (*v)*(*v); v++;
12392d61bbb3SSatish Balay #endif
12402d61bbb3SSatish Balay     }
12412d61bbb3SSatish Balay     *norm = sqrt(sum);
1242596552b5SBarry Smith   }  else if (type == NORM_INFINITY) { /* maximum row sum */
1243596552b5SBarry Smith     *norm = 0.0;
1244596552b5SBarry Smith     for (k=0; k<bs; k++) {
124574f84c7bSSatish Balay       for (j=0; j<a->mbs; j++) {
1246596552b5SBarry Smith         v = a->a + bs2*a->i[j] + k;
1247596552b5SBarry Smith         sum = 0.0;
1248596552b5SBarry Smith         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
12490e90e235SBarry Smith           for (k1=0; k1<bs; k1++){
1250596552b5SBarry Smith             sum += PetscAbsScalar(*v);
1251596552b5SBarry Smith             v   += bs;
12522d61bbb3SSatish Balay           }
12530e90e235SBarry Smith         }
1254596552b5SBarry Smith         if (sum > *norm) *norm = sum;
1255596552b5SBarry Smith       }
1256596552b5SBarry Smith     }
1257596552b5SBarry Smith   } else {
125829bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
12592d61bbb3SSatish Balay   }
12602d61bbb3SSatish Balay   PetscFunctionReturn(0);
12612d61bbb3SSatish Balay }
12622d61bbb3SSatish Balay 
12632d61bbb3SSatish Balay 
12644a2ae208SSatish Balay #undef __FUNCT__
12654a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqBAIJ"
12662d61bbb3SSatish Balay int MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg)
12672d61bbb3SSatish Balay {
12682d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
12690f5bd95cSBarry Smith   int         ierr;
1270273d9f13SBarry Smith   PetscTruth  flag;
12712d61bbb3SSatish Balay 
12722d61bbb3SSatish Balay   PetscFunctionBegin;
1273273d9f13SBarry Smith   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flag);CHKERRQ(ierr);
1274273d9f13SBarry Smith   if (!flag) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
12752d61bbb3SSatish Balay 
12762d61bbb3SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1277273d9f13SBarry Smith   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
1278273d9f13SBarry Smith     *flg = PETSC_FALSE;
1279273d9f13SBarry Smith     PetscFunctionReturn(0);
12802d61bbb3SSatish Balay   }
12812d61bbb3SSatish Balay 
12822d61bbb3SSatish Balay   /* if the a->i are the same */
12830f5bd95cSBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr);
12840f5bd95cSBarry Smith   if (*flg == PETSC_FALSE) {
12850f5bd95cSBarry Smith     PetscFunctionReturn(0);
12862d61bbb3SSatish Balay   }
12872d61bbb3SSatish Balay 
12882d61bbb3SSatish Balay   /* if a->j are the same */
12890f5bd95cSBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);CHKERRQ(ierr);
12900f5bd95cSBarry Smith   if (*flg == PETSC_FALSE) {
12910f5bd95cSBarry Smith     PetscFunctionReturn(0);
12922d61bbb3SSatish Balay   }
12932d61bbb3SSatish Balay   /* if a->a are the same */
129487828ca2SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
12952d61bbb3SSatish Balay   PetscFunctionReturn(0);
12962d61bbb3SSatish Balay 
12972d61bbb3SSatish Balay }
12982d61bbb3SSatish Balay 
12994a2ae208SSatish Balay #undef __FUNCT__
13004a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqBAIJ"
13012d61bbb3SSatish Balay int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
13022d61bbb3SSatish Balay {
13032d61bbb3SSatish Balay   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1304e1311b90SBarry Smith   int          ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
130587828ca2SBarry Smith   PetscScalar  *x,zero = 0.0;
13063f1db9ecSBarry Smith   MatScalar    *aa,*aa_j;
13072d61bbb3SSatish Balay 
13082d61bbb3SSatish Balay   PetscFunctionBegin;
130929bbc08cSBarry Smith   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
13102d61bbb3SSatish Balay   bs   = a->bs;
13112d61bbb3SSatish Balay   aa   = a->a;
13122d61bbb3SSatish Balay   ai   = a->i;
13132d61bbb3SSatish Balay   aj   = a->j;
13142d61bbb3SSatish Balay   ambs = a->mbs;
13152d61bbb3SSatish Balay   bs2  = a->bs2;
13162d61bbb3SSatish Balay 
1317e1311b90SBarry Smith   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1318e1311b90SBarry Smith   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1319e1311b90SBarry Smith   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1320273d9f13SBarry Smith   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
13212d61bbb3SSatish Balay   for (i=0; i<ambs; i++) {
13222d61bbb3SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
13232d61bbb3SSatish Balay       if (aj[j] == i) {
13242d61bbb3SSatish Balay         row  = i*bs;
13252d61bbb3SSatish Balay         aa_j = aa+j*bs2;
13262d61bbb3SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
13272d61bbb3SSatish Balay         break;
13282d61bbb3SSatish Balay       }
13292d61bbb3SSatish Balay     }
13302d61bbb3SSatish Balay   }
133115091d37SBarry Smith   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
13322d61bbb3SSatish Balay   PetscFunctionReturn(0);
13332d61bbb3SSatish Balay }
13342d61bbb3SSatish Balay 
13354a2ae208SSatish Balay #undef __FUNCT__
13364a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqBAIJ"
13372d61bbb3SSatish Balay int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
13382d61bbb3SSatish Balay {
13392d61bbb3SSatish Balay   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
134087828ca2SBarry Smith   PetscScalar  *l,*r,x,*li,*ri;
13413f1db9ecSBarry Smith   MatScalar    *aa,*v;
1342e1311b90SBarry Smith   int          ierr,i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
13432d61bbb3SSatish Balay 
13442d61bbb3SSatish Balay   PetscFunctionBegin;
13452d61bbb3SSatish Balay   ai  = a->i;
13462d61bbb3SSatish Balay   aj  = a->j;
13472d61bbb3SSatish Balay   aa  = a->a;
1348273d9f13SBarry Smith   m   = A->m;
1349273d9f13SBarry Smith   n   = A->n;
13502d61bbb3SSatish Balay   bs  = a->bs;
13512d61bbb3SSatish Balay   mbs = a->mbs;
13522d61bbb3SSatish Balay   bs2 = a->bs2;
13532d61bbb3SSatish Balay   if (ll) {
1354e1311b90SBarry Smith     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
1355e1311b90SBarry Smith     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
135629bbc08cSBarry Smith     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
13572d61bbb3SSatish Balay     for (i=0; i<mbs; i++) { /* for each block row */
13582d61bbb3SSatish Balay       M  = ai[i+1] - ai[i];
13592d61bbb3SSatish Balay       li = l + i*bs;
13602d61bbb3SSatish Balay       v  = aa + bs2*ai[i];
13612d61bbb3SSatish Balay       for (j=0; j<M; j++) { /* for each block */
13622d61bbb3SSatish Balay         for (k=0; k<bs2; k++) {
13632d61bbb3SSatish Balay           (*v++) *= li[k%bs];
13642d61bbb3SSatish Balay         }
13652d61bbb3SSatish Balay       }
13662d61bbb3SSatish Balay     }
136760d550acSSatish Balay     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1368b0a32e0cSBarry Smith     PetscLogFlops(a->nz);
13692d61bbb3SSatish Balay   }
13702d61bbb3SSatish Balay 
13712d61bbb3SSatish Balay   if (rr) {
1372e1311b90SBarry Smith     ierr = VecGetArray(rr,&r);CHKERRQ(ierr);
1373e1311b90SBarry Smith     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
137429bbc08cSBarry Smith     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
13752d61bbb3SSatish Balay     for (i=0; i<mbs; i++) { /* for each block row */
13762d61bbb3SSatish Balay       M  = ai[i+1] - ai[i];
13772d61bbb3SSatish Balay       v  = aa + bs2*ai[i];
13782d61bbb3SSatish Balay       for (j=0; j<M; j++) { /* for each block */
13792d61bbb3SSatish Balay         ri = r + bs*aj[ai[i]+j];
13802d61bbb3SSatish Balay         for (k=0; k<bs; k++) {
13812d61bbb3SSatish Balay           x = ri[k];
13822d61bbb3SSatish Balay           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
13832d61bbb3SSatish Balay         }
13842d61bbb3SSatish Balay       }
13852d61bbb3SSatish Balay     }
138660d550acSSatish Balay     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
1387b0a32e0cSBarry Smith     PetscLogFlops(a->nz);
13882d61bbb3SSatish Balay   }
13892d61bbb3SSatish Balay   PetscFunctionReturn(0);
13902d61bbb3SSatish Balay }
13912d61bbb3SSatish Balay 
13922d61bbb3SSatish Balay 
13934a2ae208SSatish Balay #undef __FUNCT__
13944a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqBAIJ"
13952d61bbb3SSatish Balay int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
13962d61bbb3SSatish Balay {
13972d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
13982d61bbb3SSatish Balay 
13992d61bbb3SSatish Balay   PetscFunctionBegin;
1400273d9f13SBarry Smith   info->rows_global    = (double)A->m;
1401273d9f13SBarry Smith   info->columns_global = (double)A->n;
1402273d9f13SBarry Smith   info->rows_local     = (double)A->m;
1403273d9f13SBarry Smith   info->columns_local  = (double)A->n;
14042d61bbb3SSatish Balay   info->block_size     = a->bs2;
14052d61bbb3SSatish Balay   info->nz_allocated   = a->maxnz;
14062d61bbb3SSatish Balay   info->nz_used        = a->bs2*a->nz;
14072d61bbb3SSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
14082d61bbb3SSatish Balay   info->assemblies   = A->num_ass;
14092d61bbb3SSatish Balay   info->mallocs      = a->reallocs;
14102d61bbb3SSatish Balay   info->memory       = A->mem;
14112d61bbb3SSatish Balay   if (A->factor) {
14122d61bbb3SSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
14132d61bbb3SSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
14142d61bbb3SSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
14152d61bbb3SSatish Balay   } else {
14162d61bbb3SSatish Balay     info->fill_ratio_given  = 0;
14172d61bbb3SSatish Balay     info->fill_ratio_needed = 0;
14182d61bbb3SSatish Balay     info->factor_mallocs    = 0;
14192d61bbb3SSatish Balay   }
14202d61bbb3SSatish Balay   PetscFunctionReturn(0);
14212d61bbb3SSatish Balay }
14222d61bbb3SSatish Balay 
14232d61bbb3SSatish Balay 
14244a2ae208SSatish Balay #undef __FUNCT__
14254a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqBAIJ"
14262d61bbb3SSatish Balay int MatZeroEntries_SeqBAIJ(Mat A)
14272d61bbb3SSatish Balay {
14282d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1429549d3d68SSatish Balay   int         ierr;
14302d61bbb3SSatish Balay 
14312d61bbb3SSatish Balay   PetscFunctionBegin;
1432549d3d68SSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
14332d61bbb3SSatish Balay   PetscFunctionReturn(0);
14342d61bbb3SSatish Balay }
1435