xref: /petsc/src/mat/impls/baij/seq/baij2.c (revision b1d4fb267e72f6b087ed013bbfbbee1418c3f503)
173f4d377SMatthew Knepley /*$Id: baij2.c,v 1.75 2001/09/07 20:09:49 bsmith Exp $*/
2cac129eeSSatish Balay 
370f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
42d61bbb3SSatish Balay #include "src/inline/spops.h"
53f1db9ecSBarry Smith #include "src/inline/ilu.h"
60a835dfdSSatish Balay #include "petscbt.h"
7cac129eeSSatish Balay 
84a2ae208SSatish Balay #undef __FUNCT__
94a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
10268466fbSBarry Smith int MatIncreaseOverlap_SeqBAIJ(Mat A,int is_max,IS is[],int ov)
11a3192f15SSatish Balay {
12a3192f15SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
13218c64b6SSatish Balay   int         row,i,j,k,l,m,n,*idx,ierr,*nidx,isz,val,ival;
14218c64b6SSatish Balay   int         start,end,*ai,*aj,bs,*nidx2;
15f1af5d2fSBarry Smith   PetscBT     table;
16a3192f15SSatish Balay 
173a40ed3dSBarry Smith   PetscFunctionBegin;
18a3192f15SSatish Balay   m     = a->mbs;
19a3192f15SSatish Balay   ai    = a->i;
20a3192f15SSatish Balay   aj    = a->j;
21218c64b6SSatish Balay   bs    = a->bs;
22a3192f15SSatish Balay 
2329bbc08cSBarry Smith   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
24a3192f15SSatish Balay 
256831982aSBarry Smith   ierr = PetscBTCreate(m,table);CHKERRQ(ierr);
26b0a32e0cSBarry Smith   ierr = PetscMalloc((m+1)*sizeof(int),&nidx);CHKERRQ(ierr);
27b0a32e0cSBarry Smith   ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr);
28a3192f15SSatish Balay 
29a3192f15SSatish Balay   for (i=0; i<is_max; i++) {
30a3192f15SSatish Balay     /* Initialise the two local arrays */
31a3192f15SSatish Balay     isz  = 0;
326831982aSBarry Smith     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
33a3192f15SSatish Balay 
34a3192f15SSatish Balay     /* Extract the indices, assume there can be duplicate entries */
35a3192f15SSatish Balay     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
36b9b97703SBarry Smith     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
37a3192f15SSatish Balay 
38a3192f15SSatish Balay     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
39a3192f15SSatish Balay     for (j=0; j<n ; ++j){
40218c64b6SSatish Balay       ival = idx[j]/bs; /* convert the indices into block indices */
4129bbc08cSBarry Smith       if (ival>m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42f1af5d2fSBarry Smith       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
43a3192f15SSatish Balay     }
44a3192f15SSatish Balay     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
45a3192f15SSatish Balay     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
46a3192f15SSatish Balay 
47a3192f15SSatish Balay     k = 0;
48a3192f15SSatish Balay     for (j=0; j<ov; j++){ /* for each overlap*/
49a3192f15SSatish Balay       n = isz;
50a3192f15SSatish Balay       for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
51a3192f15SSatish Balay         row   = nidx[k];
52a3192f15SSatish Balay         start = ai[row];
53a3192f15SSatish Balay         end   = ai[row+1];
54a3192f15SSatish Balay         for (l = start; l<end ; l++){
55a3192f15SSatish Balay           val = aj[l];
56f1af5d2fSBarry Smith           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
57a3192f15SSatish Balay         }
58a3192f15SSatish Balay       }
59a3192f15SSatish Balay     }
60218c64b6SSatish Balay     /* expand the Index Set */
61218c64b6SSatish Balay     for (j=0; j<isz; j++) {
62218c64b6SSatish Balay       for (k=0; k<bs; k++)
63218c64b6SSatish Balay         nidx2[j*bs+k] = nidx[j]*bs+k;
64218c64b6SSatish Balay     }
65329f5518SBarry Smith     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
66a3192f15SSatish Balay   }
676831982aSBarry Smith   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
68606d414cSSatish Balay   ierr = PetscFree(nidx);CHKERRQ(ierr);
69606d414cSSatish Balay   ierr = PetscFree(nidx2);CHKERRQ(ierr);
703a40ed3dSBarry Smith   PetscFunctionReturn(0);
71a3192f15SSatish Balay }
721c351548SSatish Balay 
734a2ae208SSatish Balay #undef __FUNCT__
744a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private"
757b2a1423SBarry Smith int MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
76736121d4SSatish Balay {
77736121d4SSatish Balay   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data,*c;
7840011551SBarry Smith   int          *smap,i,k,kstart,kend,ierr,oldcols = a->nbs,*lens;
79218c64b6SSatish Balay   int          row,mat_i,*mat_j,tcol,*mat_ilen;
80736121d4SSatish Balay   int          *irow,*icol,nrows,ncols,*ssmap,bs=a->bs,bs2=a->bs2;
81218c64b6SSatish Balay   int          *aj = a->j,*ai = a->i;
823f1db9ecSBarry Smith   MatScalar    *mat_a;
83736121d4SSatish Balay   Mat          C;
840f5bd95cSBarry Smith   PetscTruth   flag;
85736121d4SSatish Balay 
863a40ed3dSBarry Smith   PetscFunctionBegin;
87888f2ed8SSatish Balay   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
8829bbc08cSBarry Smith   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
89736121d4SSatish Balay 
90736121d4SSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
91218c64b6SSatish Balay   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
92b9b97703SBarry Smith   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
93b9b97703SBarry Smith   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
94736121d4SSatish Balay 
95b0a32e0cSBarry Smith   ierr = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr);
96736121d4SSatish Balay   ssmap = smap;
97b0a32e0cSBarry Smith   ierr = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr);
98549d3d68SSatish Balay   ierr  = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr);
99736121d4SSatish Balay   for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
100736121d4SSatish Balay   /* determine lens of each row */
101736121d4SSatish Balay   for (i=0; i<nrows; i++) {
102736121d4SSatish Balay     kstart  = ai[irow[i]];
103736121d4SSatish Balay     kend    = kstart + a->ilen[irow[i]];
104736121d4SSatish Balay     lens[i] = 0;
105736121d4SSatish Balay       for (k=kstart; k<kend; k++) {
106736121d4SSatish Balay         if (ssmap[aj[k]]) {
107736121d4SSatish Balay           lens[i]++;
108736121d4SSatish Balay         }
109736121d4SSatish Balay       }
110736121d4SSatish Balay     }
111736121d4SSatish Balay   /* Create and fill new matrix */
112736121d4SSatish Balay   if (scall == MAT_REUSE_MATRIX) {
113736121d4SSatish Balay     c = (Mat_SeqBAIJ *)((*B)->data);
114736121d4SSatish Balay 
11529bbc08cSBarry Smith     if (c->mbs!=nrows || c->nbs!=ncols || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
1160f5bd95cSBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr);
1170f5bd95cSBarry Smith     if (flag == PETSC_FALSE) {
11829bbc08cSBarry Smith       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
119736121d4SSatish Balay     }
120549d3d68SSatish Balay     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr);
121736121d4SSatish Balay     C = *B;
1223a40ed3dSBarry Smith   } else {
123736121d4SSatish Balay     ierr = MatCreateSeqBAIJ(A->comm,bs,nrows*bs,ncols*bs,0,lens,&C);CHKERRQ(ierr);
124736121d4SSatish Balay   }
125736121d4SSatish Balay   c = (Mat_SeqBAIJ *)(C->data);
126736121d4SSatish Balay   for (i=0; i<nrows; i++) {
127736121d4SSatish Balay     row    = irow[i];
128736121d4SSatish Balay     kstart = ai[row];
129736121d4SSatish Balay     kend   = kstart + a->ilen[row];
130736121d4SSatish Balay     mat_i  = c->i[i];
131736121d4SSatish Balay     mat_j  = c->j + mat_i;
132218c64b6SSatish Balay     mat_a  = c->a + mat_i*bs2;
133736121d4SSatish Balay     mat_ilen = c->ilen + i;
134736121d4SSatish Balay     for (k=kstart; k<kend; k++) {
135736121d4SSatish Balay       if ((tcol=ssmap[a->j[k]])) {
136736121d4SSatish Balay         *mat_j++ = tcol - 1;
137549d3d68SSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
138549d3d68SSatish Balay         mat_a   += bs2;
139736121d4SSatish Balay         (*mat_ilen)++;
140736121d4SSatish Balay       }
141736121d4SSatish Balay     }
142736121d4SSatish Balay   }
143218c64b6SSatish Balay 
144736121d4SSatish Balay   /* Free work space */
145736121d4SSatish Balay   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
146606d414cSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
147606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1486d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1496d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
150736121d4SSatish Balay 
151736121d4SSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
152736121d4SSatish Balay   *B = C;
1533a40ed3dSBarry Smith   PetscFunctionReturn(0);
154736121d4SSatish Balay }
155736121d4SSatish Balay 
1564a2ae208SSatish Balay #undef __FUNCT__
1574a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ"
1587b2a1423SBarry Smith int MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
159218c64b6SSatish Balay {
160218c64b6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
161218c64b6SSatish Balay   IS          is1,is2;
162218c64b6SSatish Balay   int         *vary,*iary,*irow,*icol,nrows,ncols,i,ierr,bs=a->bs,count;
163218c64b6SSatish Balay 
1643a40ed3dSBarry Smith   PetscFunctionBegin;
165218c64b6SSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
166218c64b6SSatish Balay   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
167b9b97703SBarry Smith   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
168b9b97703SBarry Smith   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
169218c64b6SSatish Balay 
170218c64b6SSatish Balay   /* Verify if the indices corespond to each element in a block
171218c64b6SSatish Balay    and form the IS with compressed IS */
172b0a32e0cSBarry Smith   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr);
173218c64b6SSatish Balay   iary = vary + a->mbs;
174549d3d68SSatish Balay   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
175218c64b6SSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
176218c64b6SSatish Balay   count = 0;
177218c64b6SSatish Balay   for (i=0; i<a->mbs; i++) {
178ac355199SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks");
179218c64b6SSatish Balay     if (vary[i]==bs) iary[count++] = i;
180218c64b6SSatish Balay   }
181029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
182218c64b6SSatish Balay 
183549d3d68SSatish Balay   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
184218c64b6SSatish Balay   for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
185218c64b6SSatish Balay   count = 0;
186218c64b6SSatish Balay   for (i=0; i<a->mbs; i++) {
187ac355199SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Internal error in PETSc");
188218c64b6SSatish Balay     if (vary[i]==bs) iary[count++] = i;
189218c64b6SSatish Balay   }
190029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr);
191218c64b6SSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
192218c64b6SSatish Balay   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
193606d414cSSatish Balay   ierr = PetscFree(vary);CHKERRQ(ierr);
194218c64b6SSatish Balay 
1956a6a5d1dSBarry Smith   ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr);
196218c64b6SSatish Balay   ISDestroy(is1);
197218c64b6SSatish Balay   ISDestroy(is2);
1983a40ed3dSBarry Smith   PetscFunctionReturn(0);
199218c64b6SSatish Balay }
200218c64b6SSatish Balay 
2014a2ae208SSatish Balay #undef __FUNCT__
2024a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ"
203268466fbSBarry Smith int MatGetSubMatrices_SeqBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
204736121d4SSatish Balay {
205736121d4SSatish Balay   int ierr,i;
206736121d4SSatish Balay 
2073a40ed3dSBarry Smith   PetscFunctionBegin;
208736121d4SSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
20982502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
210736121d4SSatish Balay   }
211736121d4SSatish Balay 
212736121d4SSatish Balay   for (i=0; i<n; i++) {
2136a6a5d1dSBarry Smith     ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
214736121d4SSatish Balay   }
2153a40ed3dSBarry Smith   PetscFunctionReturn(0);
216736121d4SSatish Balay }
217218c64b6SSatish Balay 
218218c64b6SSatish Balay 
2192d61bbb3SSatish Balay /* -------------------------------------------------------*/
2202d61bbb3SSatish Balay /* Should check that shapes of vectors and matrices match */
2212d61bbb3SSatish Balay /* -------------------------------------------------------*/
222d9eff348SSatish Balay #include "petscblaslapack.h"
2232d61bbb3SSatish Balay 
2244a2ae208SSatish Balay #undef __FUNCT__
2254a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_1"
2262d61bbb3SSatish Balay int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
2272d61bbb3SSatish Balay {
2282d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
22987828ca2SBarry Smith   PetscScalar     *x,*z,sum;
2303f1db9ecSBarry Smith   MatScalar       *v;
231e1311b90SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n,ierr;
2322d61bbb3SSatish Balay 
2332d61bbb3SSatish Balay   PetscFunctionBegin;
234*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
235*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
2362d61bbb3SSatish Balay 
2372d61bbb3SSatish Balay   idx   = a->j;
2382d61bbb3SSatish Balay   v     = a->a;
2392d61bbb3SSatish Balay   ii    = a->i;
2402d61bbb3SSatish Balay 
2412d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
2422d61bbb3SSatish Balay     n    = ii[1] - ii[0]; ii++;
2432d61bbb3SSatish Balay     sum  = 0.0;
2442d61bbb3SSatish Balay     while (n--) sum += *v++ * x[*idx++];
2452d61bbb3SSatish Balay     z[i] = sum;
2462d61bbb3SSatish Balay   }
247*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
248*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
249b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz - A->m);
2502d61bbb3SSatish Balay   PetscFunctionReturn(0);
2512d61bbb3SSatish Balay }
2522d61bbb3SSatish Balay 
2534a2ae208SSatish Balay #undef __FUNCT__
2544a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_2"
2552d61bbb3SSatish Balay int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
2562d61bbb3SSatish Balay {
2572d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
25887828ca2SBarry Smith   PetscScalar     *x,*z,*xb,sum1,sum2;
25987828ca2SBarry Smith   PetscScalar     x1,x2;
2603f1db9ecSBarry Smith   MatScalar       *v;
261e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
2622d61bbb3SSatish Balay 
2632d61bbb3SSatish Balay   PetscFunctionBegin;
264*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
265*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
2662d61bbb3SSatish Balay 
2672d61bbb3SSatish Balay   idx   = a->j;
2682d61bbb3SSatish Balay   v     = a->a;
2692d61bbb3SSatish Balay   ii    = a->i;
2702d61bbb3SSatish Balay 
2712d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
2722d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
2732d61bbb3SSatish Balay     sum1 = 0.0; sum2 = 0.0;
2742d61bbb3SSatish Balay     for (j=0; j<n; j++) {
2752d61bbb3SSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
2762d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
2772d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
2782d61bbb3SSatish Balay       v += 4;
2792d61bbb3SSatish Balay     }
2802d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2;
2812d61bbb3SSatish Balay     z += 2;
2822d61bbb3SSatish Balay   }
283*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
284*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
285b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - A->m);
2862d61bbb3SSatish Balay   PetscFunctionReturn(0);
2872d61bbb3SSatish Balay }
2882d61bbb3SSatish Balay 
2894a2ae208SSatish Balay #undef __FUNCT__
2904a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_3"
2912d61bbb3SSatish Balay int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
2922d61bbb3SSatish Balay {
2932d61bbb3SSatish Balay   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
29487828ca2SBarry Smith   PetscScalar  *x,*z,*xb,sum1,sum2,sum3,x1,x2,x3;
2953f1db9ecSBarry Smith   MatScalar    *v;
296e1311b90SBarry Smith   int          ierr,mbs=a->mbs,i,*idx,*ii,j,n;
2972d61bbb3SSatish Balay 
298b6410449SSatish Balay #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
299fee21e36SBarry Smith #pragma disjoint(*v,*z,*xb)
300fee21e36SBarry Smith #endif
301fee21e36SBarry Smith 
3022d61bbb3SSatish Balay   PetscFunctionBegin;
303*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
304*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
3052d61bbb3SSatish Balay 
3062d61bbb3SSatish Balay   idx   = a->j;
3072d61bbb3SSatish Balay   v     = a->a;
3082d61bbb3SSatish Balay   ii    = a->i;
3092d61bbb3SSatish Balay 
3102d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
3112d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
3122d61bbb3SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
3132d61bbb3SSatish Balay     for (j=0; j<n; j++) {
3142d61bbb3SSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
3152d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
3162d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
3172d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
3182d61bbb3SSatish Balay       v += 9;
3192d61bbb3SSatish Balay     }
3202d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
3212d61bbb3SSatish Balay     z += 3;
3222d61bbb3SSatish Balay   }
323*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
324*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
325b0a32e0cSBarry Smith   PetscLogFlops(18*a->nz - A->m);
3262d61bbb3SSatish Balay   PetscFunctionReturn(0);
3272d61bbb3SSatish Balay }
3282d61bbb3SSatish Balay 
3294a2ae208SSatish Balay #undef __FUNCT__
3304a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_4"
3312d61bbb3SSatish Balay int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
3322d61bbb3SSatish Balay {
3332d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
33487828ca2SBarry Smith   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
3353f1db9ecSBarry Smith   MatScalar       *v;
336e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
3372d61bbb3SSatish Balay 
3382d61bbb3SSatish Balay   PetscFunctionBegin;
339*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
340*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
3412d61bbb3SSatish Balay 
3422d61bbb3SSatish Balay   idx   = a->j;
3432d61bbb3SSatish Balay   v     = a->a;
3442d61bbb3SSatish Balay   ii    = a->i;
3452d61bbb3SSatish Balay 
3462d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
3472d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
3482d61bbb3SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
3492d61bbb3SSatish Balay     for (j=0; j<n; j++) {
3502d61bbb3SSatish Balay       xb = x + 4*(*idx++);
3512d61bbb3SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
3522d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
3532d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
3542d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
3552d61bbb3SSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
3562d61bbb3SSatish Balay       v += 16;
3572d61bbb3SSatish Balay     }
3582d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
3592d61bbb3SSatish Balay     z += 4;
3602d61bbb3SSatish Balay   }
361*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
362*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
363b0a32e0cSBarry Smith   PetscLogFlops(32*a->nz - A->m);
3642d61bbb3SSatish Balay   PetscFunctionReturn(0);
3652d61bbb3SSatish Balay }
3662d61bbb3SSatish Balay 
3674a2ae208SSatish Balay #undef __FUNCT__
3684a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_5"
3692d61bbb3SSatish Balay int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
3702d61bbb3SSatish Balay {
3712d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
37287828ca2SBarry Smith   PetscScalar     sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*xb,*z,*x;
3733f1db9ecSBarry Smith   MatScalar       *v;
374e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
3752d61bbb3SSatish Balay 
376433994e6SBarry Smith   PetscFunctionBegin;
377*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
378*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
3792d61bbb3SSatish Balay 
3802d61bbb3SSatish Balay   idx   = a->j;
3812d61bbb3SSatish Balay   v     = a->a;
3822d61bbb3SSatish Balay   ii    = a->i;
3832d61bbb3SSatish Balay 
3842d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
3852d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
3862d61bbb3SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
3872d61bbb3SSatish Balay     for (j=0; j<n; j++) {
3882d61bbb3SSatish Balay       xb = x + 5*(*idx++);
3892d61bbb3SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
3902d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
3912d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
3922d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
3932d61bbb3SSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
3942d61bbb3SSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
3952d61bbb3SSatish Balay       v += 25;
3962d61bbb3SSatish Balay     }
3972d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
3982d61bbb3SSatish Balay     z += 5;
3992d61bbb3SSatish Balay   }
400*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
401*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
402b0a32e0cSBarry Smith   PetscLogFlops(50*a->nz - A->m);
4032d61bbb3SSatish Balay   PetscFunctionReturn(0);
4042d61bbb3SSatish Balay }
4052d61bbb3SSatish Balay 
40615091d37SBarry Smith 
4074a2ae208SSatish Balay #undef __FUNCT__
4084a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_6"
40915091d37SBarry Smith int MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
41015091d37SBarry Smith {
41115091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
41287828ca2SBarry Smith   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
41387828ca2SBarry Smith   PetscScalar     x1,x2,x3,x4,x5,x6;
41415091d37SBarry Smith   MatScalar       *v;
41515091d37SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
41615091d37SBarry Smith 
417433994e6SBarry Smith   PetscFunctionBegin;
418*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
419*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
42015091d37SBarry Smith 
42115091d37SBarry Smith   idx   = a->j;
42215091d37SBarry Smith   v     = a->a;
42315091d37SBarry Smith   ii    = a->i;
42415091d37SBarry Smith 
42515091d37SBarry Smith   for (i=0; i<mbs; i++) {
42615091d37SBarry Smith     n  = ii[1] - ii[0]; ii++;
42715091d37SBarry Smith     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
42815091d37SBarry Smith     for (j=0; j<n; j++) {
42915091d37SBarry Smith       xb = x + 6*(*idx++);
43015091d37SBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
43115091d37SBarry Smith       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
43215091d37SBarry Smith       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
43315091d37SBarry Smith       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
43415091d37SBarry Smith       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
43515091d37SBarry Smith       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
43615091d37SBarry Smith       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
43715091d37SBarry Smith       v += 36;
43815091d37SBarry Smith     }
43915091d37SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
44015091d37SBarry Smith     z += 6;
44115091d37SBarry Smith   }
44215091d37SBarry Smith 
443*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
444*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
445b0a32e0cSBarry Smith   PetscLogFlops(72*a->nz - A->m);
44615091d37SBarry Smith   PetscFunctionReturn(0);
44715091d37SBarry Smith }
4484a2ae208SSatish Balay #undef __FUNCT__
4494a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_7"
4502d61bbb3SSatish Balay int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
4512d61bbb3SSatish Balay {
4522d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
45387828ca2SBarry Smith   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
45487828ca2SBarry Smith   PetscScalar     x1,x2,x3,x4,x5,x6,x7;
4553f1db9ecSBarry Smith   MatScalar       *v;
456e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
4572d61bbb3SSatish Balay 
458433994e6SBarry Smith   PetscFunctionBegin;
459*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
460*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
4612d61bbb3SSatish Balay 
4622d61bbb3SSatish Balay   idx   = a->j;
4632d61bbb3SSatish Balay   v     = a->a;
4642d61bbb3SSatish Balay   ii    = a->i;
4652d61bbb3SSatish Balay 
4662d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
4672d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
4682d61bbb3SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
4692d61bbb3SSatish Balay     for (j=0; j<n; j++) {
4702d61bbb3SSatish Balay       xb = x + 7*(*idx++);
4712d61bbb3SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
4722d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
4732d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
4742d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
4752d61bbb3SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
4762d61bbb3SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
4772d61bbb3SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
4782d61bbb3SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
4792d61bbb3SSatish Balay       v += 49;
4802d61bbb3SSatish Balay     }
4812d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
4822d61bbb3SSatish Balay     z += 7;
4832d61bbb3SSatish Balay   }
4842d61bbb3SSatish Balay 
485*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
486*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
487b0a32e0cSBarry Smith   PetscLogFlops(98*a->nz - A->m);
4882d61bbb3SSatish Balay   PetscFunctionReturn(0);
4892d61bbb3SSatish Balay }
4902d61bbb3SSatish Balay 
4913f1db9ecSBarry Smith /*
4923f1db9ecSBarry Smith     This will not work with MatScalar == float because it calls the BLAS
4933f1db9ecSBarry Smith */
4944a2ae208SSatish Balay #undef __FUNCT__
4954a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqBAIJ_N"
4962d61bbb3SSatish Balay int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
4972d61bbb3SSatish Balay {
4982d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
49987828ca2SBarry Smith   PetscScalar     *x,*z,*xb,*work,*workt;
5003f1db9ecSBarry Smith   MatScalar       *v;
501e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
5023f1db9ecSBarry Smith   int             ncols,k;
5032d61bbb3SSatish Balay 
5042d61bbb3SSatish Balay   PetscFunctionBegin;
505*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
506*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
5072d61bbb3SSatish Balay 
5082d61bbb3SSatish Balay   idx   = a->j;
5092d61bbb3SSatish Balay   v     = a->a;
5102d61bbb3SSatish Balay   ii    = a->i;
511218c64b6SSatish Balay 
512218c64b6SSatish Balay 
5132d61bbb3SSatish Balay   if (!a->mult_work) {
514273d9f13SBarry Smith     k    = PetscMax(A->m,A->n);
51587828ca2SBarry Smith     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
5162d61bbb3SSatish Balay   }
5172d61bbb3SSatish Balay   work = a->mult_work;
5182d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
5192d61bbb3SSatish Balay     n     = ii[1] - ii[0]; ii++;
5202d61bbb3SSatish Balay     ncols = n*bs;
5212d61bbb3SSatish Balay     workt = work;
5222d61bbb3SSatish Balay     for (j=0; j<n; j++) {
5232d61bbb3SSatish Balay       xb = x + bs*(*idx++);
5242d61bbb3SSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
5252d61bbb3SSatish Balay       workt += bs;
5262d61bbb3SSatish Balay     }
5273f1db9ecSBarry Smith     Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
5283f1db9ecSBarry Smith     /* LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
5292d61bbb3SSatish Balay     v += n*bs2;
5302d61bbb3SSatish Balay     z += bs;
5312d61bbb3SSatish Balay   }
532*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
533*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
534b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz*bs2 - A->m);
5352d61bbb3SSatish Balay   PetscFunctionReturn(0);
5362d61bbb3SSatish Balay }
5372d61bbb3SSatish Balay 
5384a2ae208SSatish Balay #undef __FUNCT__
5394a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_1"
5402d61bbb3SSatish Balay int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
5412d61bbb3SSatish Balay {
5422d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
54387828ca2SBarry Smith   PetscScalar     *x,*y,*z,sum;
5443f1db9ecSBarry Smith   MatScalar       *v;
545e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,n;
5462d61bbb3SSatish Balay 
5472d61bbb3SSatish Balay   PetscFunctionBegin;
548*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
549*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
5502e8a6d31SBarry Smith   if (zz != yy) {
551*b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
5522e8a6d31SBarry Smith   } else {
5532e8a6d31SBarry Smith     z = y;
5542e8a6d31SBarry Smith   }
5552d61bbb3SSatish Balay 
5562d61bbb3SSatish Balay   idx   = a->j;
5572d61bbb3SSatish Balay   v     = a->a;
5582d61bbb3SSatish Balay   ii    = a->i;
5592d61bbb3SSatish Balay 
5602d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
5612d61bbb3SSatish Balay     n    = ii[1] - ii[0]; ii++;
5622d61bbb3SSatish Balay     sum  = y[i];
5632d61bbb3SSatish Balay     while (n--) sum += *v++ * x[*idx++];
5642d61bbb3SSatish Balay     z[i] = sum;
5652d61bbb3SSatish Balay   }
566*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
567*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
5682e8a6d31SBarry Smith   if (zz != yy) {
569*b1d4fb26SBarry Smith     ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
5702e8a6d31SBarry Smith   }
571b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz);
5722d61bbb3SSatish Balay   PetscFunctionReturn(0);
5732d61bbb3SSatish Balay }
5742d61bbb3SSatish Balay 
5754a2ae208SSatish Balay #undef __FUNCT__
5764a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_2"
5772d61bbb3SSatish Balay int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
5782d61bbb3SSatish Balay {
5792d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
58087828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,sum1,sum2;
58187828ca2SBarry Smith   PetscScalar     x1,x2;
5823f1db9ecSBarry Smith   MatScalar       *v;
583e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
5842d61bbb3SSatish Balay 
5852d61bbb3SSatish Balay   PetscFunctionBegin;
586*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
587*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
5882e8a6d31SBarry Smith   if (zz != yy) {
589*b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
5902e8a6d31SBarry Smith   } else {
5912e8a6d31SBarry Smith     z = y;
5922e8a6d31SBarry Smith   }
5932d61bbb3SSatish Balay 
5942d61bbb3SSatish Balay   idx   = a->j;
5952d61bbb3SSatish Balay   v     = a->a;
5962d61bbb3SSatish Balay   ii    = a->i;
5972d61bbb3SSatish Balay 
5982d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
5992d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
6002d61bbb3SSatish Balay     sum1 = y[0]; sum2 = y[1];
6012d61bbb3SSatish Balay     for (j=0; j<n; j++) {
6022d61bbb3SSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
6032d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
6042d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
6052d61bbb3SSatish Balay       v += 4;
6062d61bbb3SSatish Balay     }
6072d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2;
6082d61bbb3SSatish Balay     z += 2; y += 2;
6092d61bbb3SSatish Balay   }
610*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
611*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
6122e8a6d31SBarry Smith   if (zz != yy) {
613*b1d4fb26SBarry Smith     ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
6142e8a6d31SBarry Smith   }
615b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz);
6162d61bbb3SSatish Balay   PetscFunctionReturn(0);
6172d61bbb3SSatish Balay }
6182d61bbb3SSatish Balay 
6194a2ae208SSatish Balay #undef __FUNCT__
6204a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_3"
6212d61bbb3SSatish Balay int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
6222d61bbb3SSatish Balay {
6232d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
62487828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,x1,x2,x3;
6253f1db9ecSBarry Smith   MatScalar       *v;
626e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
6272d61bbb3SSatish Balay 
6282d61bbb3SSatish Balay   PetscFunctionBegin;
629*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
630*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
6312e8a6d31SBarry Smith   if (zz != yy) {
632*b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
6332e8a6d31SBarry Smith   } else {
6342e8a6d31SBarry Smith     z = y;
6352e8a6d31SBarry Smith   }
6362d61bbb3SSatish Balay 
6372d61bbb3SSatish Balay   idx   = a->j;
6382d61bbb3SSatish Balay   v     = a->a;
6392d61bbb3SSatish Balay   ii    = a->i;
6402d61bbb3SSatish Balay 
6412d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
6422d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
6432d61bbb3SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
6442d61bbb3SSatish Balay     for (j=0; j<n; j++) {
6452d61bbb3SSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
6462d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
6472d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
6482d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
6492d61bbb3SSatish Balay       v += 9;
6502d61bbb3SSatish Balay     }
6512d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
6522d61bbb3SSatish Balay     z += 3; y += 3;
6532d61bbb3SSatish Balay   }
654*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
655*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
6562e8a6d31SBarry Smith   if (zz != yy) {
657*b1d4fb26SBarry Smith     ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
6582e8a6d31SBarry Smith   }
659b0a32e0cSBarry Smith   PetscLogFlops(18*a->nz);
6602d61bbb3SSatish Balay   PetscFunctionReturn(0);
6612d61bbb3SSatish Balay }
6622d61bbb3SSatish Balay 
6634a2ae208SSatish Balay #undef __FUNCT__
6644a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_4"
6652d61bbb3SSatish Balay int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
6662d61bbb3SSatish Balay {
6672d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
66887828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
6693f1db9ecSBarry Smith   MatScalar       *v;
670e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii;
6712d61bbb3SSatish Balay   int             j,n;
6722d61bbb3SSatish Balay 
6732d61bbb3SSatish Balay   PetscFunctionBegin;
674*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
675*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
6762e8a6d31SBarry Smith   if (zz != yy) {
677*b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
6782e8a6d31SBarry Smith   } else {
6792e8a6d31SBarry Smith     z = y;
6802e8a6d31SBarry Smith   }
6812d61bbb3SSatish Balay 
6822d61bbb3SSatish Balay   idx   = a->j;
6832d61bbb3SSatish Balay   v     = a->a;
6842d61bbb3SSatish Balay   ii    = a->i;
6852d61bbb3SSatish Balay 
6862d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
6872d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
6882d61bbb3SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
6892d61bbb3SSatish Balay     for (j=0; j<n; j++) {
6902d61bbb3SSatish Balay       xb = x + 4*(*idx++);
6912d61bbb3SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
6922d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
6932d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
6942d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
6952d61bbb3SSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
6962d61bbb3SSatish Balay       v += 16;
6972d61bbb3SSatish Balay     }
6982d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
6992d61bbb3SSatish Balay     z += 4; y += 4;
7002d61bbb3SSatish Balay   }
701*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
702*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
7032e8a6d31SBarry Smith   if (zz != yy) {
704*b1d4fb26SBarry Smith     ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
7052e8a6d31SBarry Smith   }
706b0a32e0cSBarry Smith   PetscLogFlops(32*a->nz);
7072d61bbb3SSatish Balay   PetscFunctionReturn(0);
7082d61bbb3SSatish Balay }
7092d61bbb3SSatish Balay 
7104a2ae208SSatish Balay #undef __FUNCT__
7114a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_5"
7122d61bbb3SSatish Balay int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
7132d61bbb3SSatish Balay {
7142d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
71587828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
7163f1db9ecSBarry Smith   MatScalar       *v;
717e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
7182d61bbb3SSatish Balay 
7192d61bbb3SSatish Balay   PetscFunctionBegin;
720*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
721*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
7222e8a6d31SBarry Smith   if (zz != yy) {
723*b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
7242e8a6d31SBarry Smith   } else {
7252e8a6d31SBarry Smith     z = y;
7262e8a6d31SBarry Smith   }
7272d61bbb3SSatish Balay 
7282d61bbb3SSatish Balay   idx   = a->j;
7292d61bbb3SSatish Balay   v     = a->a;
7302d61bbb3SSatish Balay   ii    = a->i;
7312d61bbb3SSatish Balay 
7322d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
7332d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
7342d61bbb3SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
7352d61bbb3SSatish Balay     for (j=0; j<n; j++) {
7362d61bbb3SSatish Balay       xb = x + 5*(*idx++);
7372d61bbb3SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
7382d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
7392d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
7402d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
7412d61bbb3SSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
7422d61bbb3SSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
7432d61bbb3SSatish Balay       v += 25;
7442d61bbb3SSatish Balay     }
7452d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
7462d61bbb3SSatish Balay     z += 5; y += 5;
7472d61bbb3SSatish Balay   }
748*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
749*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
7502e8a6d31SBarry Smith   if (zz != yy) {
751*b1d4fb26SBarry Smith     ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
7522e8a6d31SBarry Smith   }
753b0a32e0cSBarry Smith   PetscLogFlops(50*a->nz);
7542d61bbb3SSatish Balay   PetscFunctionReturn(0);
7552d61bbb3SSatish Balay }
7564a2ae208SSatish Balay #undef __FUNCT__
7574a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_6"
75815091d37SBarry Smith int MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
75915091d37SBarry Smith {
76015091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
76187828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
76287828ca2SBarry Smith   PetscScalar     x1,x2,x3,x4,x5,x6;
76315091d37SBarry Smith   MatScalar       *v;
76415091d37SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
76515091d37SBarry Smith 
76615091d37SBarry Smith   PetscFunctionBegin;
767*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
768*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
76915091d37SBarry Smith   if (zz != yy) {
770*b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
77115091d37SBarry Smith   } else {
77215091d37SBarry Smith     z = y;
77315091d37SBarry Smith   }
77415091d37SBarry Smith 
77515091d37SBarry Smith   idx   = a->j;
77615091d37SBarry Smith   v     = a->a;
77715091d37SBarry Smith   ii    = a->i;
77815091d37SBarry Smith 
77915091d37SBarry Smith   for (i=0; i<mbs; i++) {
78015091d37SBarry Smith     n  = ii[1] - ii[0]; ii++;
78115091d37SBarry Smith     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
78215091d37SBarry Smith     for (j=0; j<n; j++) {
7833b95cb0eSSatish Balay       xb = x + 6*(*idx++);
78415091d37SBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
78515091d37SBarry Smith       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
78615091d37SBarry Smith       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
78715091d37SBarry Smith       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
78815091d37SBarry Smith       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
78915091d37SBarry Smith       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
79015091d37SBarry Smith       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
79115091d37SBarry Smith       v += 36;
79215091d37SBarry Smith     }
79315091d37SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
79415091d37SBarry Smith     z += 6; y += 6;
79515091d37SBarry Smith   }
796*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
797*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
79815091d37SBarry Smith   if (zz != yy) {
799*b1d4fb26SBarry Smith     ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
80015091d37SBarry Smith   }
801b0a32e0cSBarry Smith   PetscLogFlops(72*a->nz);
80215091d37SBarry Smith   PetscFunctionReturn(0);
80315091d37SBarry Smith }
8042d61bbb3SSatish Balay 
8054a2ae208SSatish Balay #undef __FUNCT__
8064a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_7"
8072d61bbb3SSatish Balay int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
8082d61bbb3SSatish Balay {
8092d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
81087828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
81187828ca2SBarry Smith   PetscScalar     x1,x2,x3,x4,x5,x6,x7;
8123f1db9ecSBarry Smith   MatScalar       *v;
813e1311b90SBarry Smith   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
8142d61bbb3SSatish Balay 
8152d61bbb3SSatish Balay   PetscFunctionBegin;
816*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
817*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
8182e8a6d31SBarry Smith   if (zz != yy) {
819*b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
8202e8a6d31SBarry Smith   } else {
8212e8a6d31SBarry Smith     z = y;
8222e8a6d31SBarry Smith   }
8232d61bbb3SSatish Balay 
8242d61bbb3SSatish Balay   idx   = a->j;
8252d61bbb3SSatish Balay   v     = a->a;
8262d61bbb3SSatish Balay   ii    = a->i;
8272d61bbb3SSatish Balay 
8282d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
8292d61bbb3SSatish Balay     n  = ii[1] - ii[0]; ii++;
8302d61bbb3SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
8312d61bbb3SSatish Balay     for (j=0; j<n; j++) {
8322d61bbb3SSatish Balay       xb = x + 7*(*idx++);
8332d61bbb3SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
8342d61bbb3SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
8352d61bbb3SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
8362d61bbb3SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
8372d61bbb3SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
8382d61bbb3SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
8392d61bbb3SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
8402d61bbb3SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
8412d61bbb3SSatish Balay       v += 49;
8422d61bbb3SSatish Balay     }
8432d61bbb3SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
8442d61bbb3SSatish Balay     z += 7; y += 7;
8452d61bbb3SSatish Balay   }
846*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
847*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
8482e8a6d31SBarry Smith   if (zz != yy) {
849*b1d4fb26SBarry Smith     ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
8502e8a6d31SBarry Smith   }
851b0a32e0cSBarry Smith   PetscLogFlops(98*a->nz);
8522d61bbb3SSatish Balay   PetscFunctionReturn(0);
8532d61bbb3SSatish Balay }
854218c64b6SSatish Balay 
8554a2ae208SSatish Balay #undef __FUNCT__
8564a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqBAIJ_N"
8572d61bbb3SSatish Balay int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
8582d61bbb3SSatish Balay {
8592d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
860b2e7d493SHong Zhang   PetscScalar    *x,*z,*xb,*work,*workt,*y;
8613f1db9ecSBarry Smith   MatScalar      *v;
8622d61bbb3SSatish Balay   int            mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
8633f1db9ecSBarry Smith   int            ncols,k;
864218c64b6SSatish Balay 
8652d61bbb3SSatish Balay   PetscFunctionBegin;
866*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
867*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
868b2e7d493SHong Zhang   if (zz != yy) {
869b2e7d493SHong Zhang     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
870b2e7d493SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
871b2e7d493SHong Zhang     ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
872b2e7d493SHong Zhang   }
8732d61bbb3SSatish Balay 
8742d61bbb3SSatish Balay   idx   = a->j;
8752d61bbb3SSatish Balay   v     = a->a;
8762d61bbb3SSatish Balay   ii    = a->i;
8772d61bbb3SSatish Balay 
8782d61bbb3SSatish Balay 
8792d61bbb3SSatish Balay   if (!a->mult_work) {
880273d9f13SBarry Smith     k    = PetscMax(A->m,A->n);
88187828ca2SBarry Smith     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
8822d61bbb3SSatish Balay   }
8832d61bbb3SSatish Balay   work = a->mult_work;
8842d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
8852d61bbb3SSatish Balay     n     = ii[1] - ii[0]; ii++;
8862d61bbb3SSatish Balay     ncols = n*bs;
8872d61bbb3SSatish Balay     workt = work;
8882d61bbb3SSatish Balay     for (j=0; j<n; j++) {
8892d61bbb3SSatish Balay       xb = x + bs*(*idx++);
8902d61bbb3SSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
8912d61bbb3SSatish Balay       workt += bs;
8922d61bbb3SSatish Balay     }
8933f1db9ecSBarry Smith     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
8943f1db9ecSBarry Smith     /* LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
8952d61bbb3SSatish Balay     v += n*bs2;
8962d61bbb3SSatish Balay     z += bs;
8972d61bbb3SSatish Balay   }
898*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
899*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
900b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz*bs2);
9012d61bbb3SSatish Balay   PetscFunctionReturn(0);
9022d61bbb3SSatish Balay }
9032d61bbb3SSatish Balay 
9044a2ae208SSatish Balay #undef __FUNCT__
9054a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_SeqBAIJ"
9067c922b88SBarry Smith int MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
9072d61bbb3SSatish Balay {
9082d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
90987828ca2SBarry Smith   PetscScalar     *xg,*zg,*zb,zero = 0.0;
91087828ca2SBarry Smith   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
9113f1db9ecSBarry Smith   MatScalar       *v;
912f1af5d2fSBarry Smith   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval;
9132d61bbb3SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
9142d61bbb3SSatish Balay 
9152d61bbb3SSatish Balay   PetscFunctionBegin;
916f1af5d2fSBarry Smith   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
917*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&xg);CHKERRQ(ierr); x = xg;
918*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&zg);CHKERRQ(ierr); z = zg;
9192d61bbb3SSatish Balay 
9202d61bbb3SSatish Balay   idx   = a->j;
9212d61bbb3SSatish Balay   v     = a->a;
9222d61bbb3SSatish Balay   ii    = a->i;
923f1af5d2fSBarry Smith   xb    = x;
9242d61bbb3SSatish Balay   switch (bs) {
9252d61bbb3SSatish Balay   case 1:
9262d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
9272d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
928f1af5d2fSBarry Smith       x1 = xb[0];
9292d61bbb3SSatish Balay       ib = idx + ai[i];
9302d61bbb3SSatish Balay       for (j=0; j<n; j++) {
9312d61bbb3SSatish Balay         rval    = ib[j];
932f1af5d2fSBarry Smith         z[rval] += *v * x1;
933f1af5d2fSBarry Smith         v++;
9342d61bbb3SSatish Balay       }
935f1af5d2fSBarry Smith       xb++;
9362d61bbb3SSatish Balay     }
9372d61bbb3SSatish Balay     break;
9382d61bbb3SSatish Balay   case 2:
9392d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
9402d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
941f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1];
9422d61bbb3SSatish Balay       ib = idx + ai[i];
9432d61bbb3SSatish Balay       for (j=0; j<n; j++) {
9442d61bbb3SSatish Balay         rval      = ib[j]*2;
9452d61bbb3SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
946f1af5d2fSBarry Smith         z[rval]   += v[2]*x1 + v[3]*x2;
9472d61bbb3SSatish Balay         v  += 4;
9482d61bbb3SSatish Balay       }
949f1af5d2fSBarry Smith       xb += 2;
9502d61bbb3SSatish Balay     }
9512d61bbb3SSatish Balay     break;
9522d61bbb3SSatish Balay   case 3:
9532d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
9542d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
955f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
9562d61bbb3SSatish Balay       ib = idx + ai[i];
9572d61bbb3SSatish Balay       for (j=0; j<n; j++) {
9582d61bbb3SSatish Balay         rval      = ib[j]*3;
9592d61bbb3SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
9602d61bbb3SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
961f1af5d2fSBarry Smith         z[rval]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
9622d61bbb3SSatish Balay         v  += 9;
9632d61bbb3SSatish Balay       }
964f1af5d2fSBarry Smith       xb += 3;
9652d61bbb3SSatish Balay     }
9662d61bbb3SSatish Balay     break;
9672d61bbb3SSatish Balay   case 4:
9682d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
9692d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
970f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
9712d61bbb3SSatish Balay       ib = idx + ai[i];
9722d61bbb3SSatish Balay       for (j=0; j<n; j++) {
9732d61bbb3SSatish Balay         rval      = ib[j]*4;
9742d61bbb3SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
9752d61bbb3SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
9762d61bbb3SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
977f1af5d2fSBarry Smith         z[rval]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
9782d61bbb3SSatish Balay         v  += 16;
9792d61bbb3SSatish Balay       }
980f1af5d2fSBarry Smith       xb += 4;
9812d61bbb3SSatish Balay     }
9822d61bbb3SSatish Balay     break;
9832d61bbb3SSatish Balay   case 5:
9842d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
9852d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
986f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
9872d61bbb3SSatish Balay       x4 = xb[3]; x5 = xb[4];
9882d61bbb3SSatish Balay       ib = idx + ai[i];
9892d61bbb3SSatish Balay       for (j=0; j<n; j++) {
9902d61bbb3SSatish Balay         rval      = ib[j]*5;
9912d61bbb3SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
9922d61bbb3SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
9932d61bbb3SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
9942d61bbb3SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
995f1af5d2fSBarry Smith         z[rval]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
9962d61bbb3SSatish Balay         v  += 25;
9972d61bbb3SSatish Balay       }
998f1af5d2fSBarry Smith       xb += 5;
9992d61bbb3SSatish Balay     }
10002d61bbb3SSatish Balay     break;
1001f1af5d2fSBarry Smith   case 6:
1002f1af5d2fSBarry Smith     for (i=0; i<mbs; i++) {
1003f1af5d2fSBarry Smith       n  = ii[1] - ii[0]; ii++;
1004f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1005f1af5d2fSBarry Smith       x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1006f1af5d2fSBarry Smith       ib = idx + ai[i];
1007f1af5d2fSBarry Smith       for (j=0; j<n; j++) {
1008f1af5d2fSBarry Smith         rval      = ib[j]*6;
1009f1af5d2fSBarry Smith         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6;
1010f1af5d2fSBarry Smith         z[rval++] +=  v[6]*x1 +  v[7]*x2 +  v[8]*x3 +  v[9]*x4 + v[10]*x5 + v[11]*x6;
1011f1af5d2fSBarry Smith         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
1012f1af5d2fSBarry Smith         z[rval++] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
1013f1af5d2fSBarry Smith         z[rval++] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
1014f1af5d2fSBarry Smith         z[rval]   += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
1015f1af5d2fSBarry Smith         v  += 36;
1016f1af5d2fSBarry Smith       }
1017f1af5d2fSBarry Smith       xb += 6;
1018f1af5d2fSBarry Smith     }
1019f1af5d2fSBarry Smith     break;
1020f1af5d2fSBarry Smith   case 7:
1021f1af5d2fSBarry Smith     for (i=0; i<mbs; i++) {
1022f1af5d2fSBarry Smith       n  = ii[1] - ii[0]; ii++;
1023f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1024f1af5d2fSBarry Smith       x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1025f1af5d2fSBarry Smith       ib = idx + ai[i];
1026f1af5d2fSBarry Smith       for (j=0; j<n; j++) {
1027f1af5d2fSBarry Smith         rval      = ib[j]*7;
1028f1af5d2fSBarry Smith         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7;
1029f1af5d2fSBarry Smith         z[rval++] +=  v[7]*x1 +  v[8]*x2 +  v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
1030f1af5d2fSBarry Smith         z[rval++] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
1031f1af5d2fSBarry Smith         z[rval++] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
1032f1af5d2fSBarry Smith         z[rval++] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
1033f1af5d2fSBarry Smith         z[rval++] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
1034f1af5d2fSBarry Smith         z[rval]   += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
1035f1af5d2fSBarry Smith         v  += 49;
1036f1af5d2fSBarry Smith       }
1037f1af5d2fSBarry Smith       xb += 7;
1038f1af5d2fSBarry Smith     }
1039f1af5d2fSBarry Smith     break;
1040f1af5d2fSBarry Smith   default: {       /* block sizes larger then 7 by 7 are handled by BLAS */
10413f1db9ecSBarry Smith       int          ncols,k;
104287828ca2SBarry Smith       PetscScalar  *work,*workt;
10433f1db9ecSBarry Smith 
10442d61bbb3SSatish Balay       if (!a->mult_work) {
1045273d9f13SBarry Smith         k = PetscMax(A->m,A->n);
104687828ca2SBarry Smith         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
10472d61bbb3SSatish Balay       }
10482d61bbb3SSatish Balay       work = a->mult_work;
10492d61bbb3SSatish Balay       for (i=0; i<mbs; i++) {
10502d61bbb3SSatish Balay         n     = ii[1] - ii[0]; ii++;
10512d61bbb3SSatish Balay         ncols = n*bs;
105287828ca2SBarry Smith         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1053d824769bSBarry Smith         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
10543f1db9ecSBarry Smith         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
10552d61bbb3SSatish Balay         v += n*bs2;
10562d61bbb3SSatish Balay         x += bs;
10572d61bbb3SSatish Balay         workt = work;
10582d61bbb3SSatish Balay         for (j=0; j<n; j++) {
10592d61bbb3SSatish Balay           zb = z + bs*(*idx++);
10602d61bbb3SSatish Balay           for (k=0; k<bs; k++) zb[k] += workt[k] ;
10612d61bbb3SSatish Balay           workt += bs;
10622d61bbb3SSatish Balay         }
10632d61bbb3SSatish Balay       }
10642d61bbb3SSatish Balay     }
10652d61bbb3SSatish Balay   }
1066*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&xg);CHKERRQ(ierr);
1067*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&zg);CHKERRQ(ierr);
1068b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz*a->bs2 - A->n);
10692d61bbb3SSatish Balay   PetscFunctionReturn(0);
10702d61bbb3SSatish Balay }
10712d61bbb3SSatish Balay 
10724a2ae208SSatish Balay #undef __FUNCT__
10734a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ"
10747c922b88SBarry Smith int MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
10752d61bbb3SSatish Balay 
10762d61bbb3SSatish Balay {
10772d61bbb3SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
107887828ca2SBarry Smith   PetscScalar     *xg,*zg,*zb,*x,*z,*xb,x1,x2,x3,x4,x5;
10793f1db9ecSBarry Smith   MatScalar       *v;
1080f1af5d2fSBarry Smith   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
10812d61bbb3SSatish Balay 
10822d61bbb3SSatish Balay   PetscFunctionBegin;
1083ef66eb69SBarry Smith   if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); }
1084*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&xg);CHKERRQ(ierr); x = xg;
1085*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&zg);CHKERRQ(ierr); z = zg;
10862d61bbb3SSatish Balay 
10872d61bbb3SSatish Balay 
10882d61bbb3SSatish Balay   idx   = a->j;
10892d61bbb3SSatish Balay   v     = a->a;
10902d61bbb3SSatish Balay   ii    = a->i;
1091f1af5d2fSBarry Smith   xb    = x;
10922d61bbb3SSatish Balay 
10932d61bbb3SSatish Balay   switch (bs) {
10942d61bbb3SSatish Balay   case 1:
10952d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
10962d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
1097f1af5d2fSBarry Smith       x1 = xb[0];
10982d61bbb3SSatish Balay       ib = idx + ai[i];
10992d61bbb3SSatish Balay       for (j=0; j<n; j++) {
11002d61bbb3SSatish Balay         rval    = ib[j];
1101f1af5d2fSBarry Smith         z[rval] += *v * x1;
1102f1af5d2fSBarry Smith         v++;
11032d61bbb3SSatish Balay       }
1104f1af5d2fSBarry Smith       xb++;
11052d61bbb3SSatish Balay     }
11062d61bbb3SSatish Balay     break;
11072d61bbb3SSatish Balay   case 2:
11082d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
11092d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
1110f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1];
11112d61bbb3SSatish Balay       ib = idx + ai[i];
11122d61bbb3SSatish Balay       for (j=0; j<n; j++) {
11132d61bbb3SSatish Balay         rval      = ib[j]*2;
11142d61bbb3SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
11152d61bbb3SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
11162d61bbb3SSatish Balay         v  += 4;
11172d61bbb3SSatish Balay       }
1118f1af5d2fSBarry Smith       xb += 2;
11192d61bbb3SSatish Balay     }
11202d61bbb3SSatish Balay     break;
11212d61bbb3SSatish Balay   case 3:
11222d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
11232d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
1124f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
11252d61bbb3SSatish Balay       ib = idx + ai[i];
11262d61bbb3SSatish Balay       for (j=0; j<n; j++) {
11272d61bbb3SSatish Balay         rval      = ib[j]*3;
11282d61bbb3SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
11292d61bbb3SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
11302d61bbb3SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
11312d61bbb3SSatish Balay         v  += 9;
11322d61bbb3SSatish Balay       }
1133f1af5d2fSBarry Smith       xb += 3;
11342d61bbb3SSatish Balay     }
11352d61bbb3SSatish Balay     break;
11362d61bbb3SSatish Balay   case 4:
11372d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
11382d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
1139f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
11402d61bbb3SSatish Balay       ib = idx + ai[i];
11412d61bbb3SSatish Balay       for (j=0; j<n; j++) {
11422d61bbb3SSatish Balay         rval      = ib[j]*4;
11432d61bbb3SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
11442d61bbb3SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
11452d61bbb3SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
11462d61bbb3SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
11472d61bbb3SSatish Balay         v  += 16;
11482d61bbb3SSatish Balay       }
1149f1af5d2fSBarry Smith       xb += 4;
11502d61bbb3SSatish Balay     }
11512d61bbb3SSatish Balay     break;
11522d61bbb3SSatish Balay   case 5:
11532d61bbb3SSatish Balay     for (i=0; i<mbs; i++) {
11542d61bbb3SSatish Balay       n  = ii[1] - ii[0]; ii++;
1155f1af5d2fSBarry Smith       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
11562d61bbb3SSatish Balay       x4 = xb[3]; x5 = xb[4];
11572d61bbb3SSatish Balay       ib = idx + ai[i];
11582d61bbb3SSatish Balay       for (j=0; j<n; j++) {
11592d61bbb3SSatish Balay         rval      = ib[j]*5;
11602d61bbb3SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
11612d61bbb3SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
11622d61bbb3SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
11632d61bbb3SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
11642d61bbb3SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
11652d61bbb3SSatish Balay         v  += 25;
11662d61bbb3SSatish Balay       }
1167f1af5d2fSBarry Smith       xb += 5;
11682d61bbb3SSatish Balay     }
11692d61bbb3SSatish Balay     break;
1170f1af5d2fSBarry Smith   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
11713f1db9ecSBarry Smith       int          ncols,k;
117287828ca2SBarry Smith       PetscScalar  *work,*workt;
11733f1db9ecSBarry Smith 
11742d61bbb3SSatish Balay       if (!a->mult_work) {
1175273d9f13SBarry Smith         k = PetscMax(A->m,A->n);
117687828ca2SBarry Smith         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
11772d61bbb3SSatish Balay       }
11782d61bbb3SSatish Balay       work = a->mult_work;
11792d61bbb3SSatish Balay       for (i=0; i<mbs; i++) {
11802d61bbb3SSatish Balay         n     = ii[1] - ii[0]; ii++;
11812d61bbb3SSatish Balay         ncols = n*bs;
118287828ca2SBarry Smith         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
11833f1db9ecSBarry Smith         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
11843f1db9ecSBarry Smith         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
11852d61bbb3SSatish Balay         v += n*bs2;
11862d61bbb3SSatish Balay         x += bs;
11872d61bbb3SSatish Balay         workt = work;
11882d61bbb3SSatish Balay         for (j=0; j<n; j++) {
11892d61bbb3SSatish Balay           zb = z + bs*(*idx++);
11902d61bbb3SSatish Balay           for (k=0; k<bs; k++) zb[k] += workt[k] ;
11912d61bbb3SSatish Balay           workt += bs;
11922d61bbb3SSatish Balay         }
11932d61bbb3SSatish Balay       }
11942d61bbb3SSatish Balay     }
11952d61bbb3SSatish Balay   }
1196*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&xg);CHKERRQ(ierr);
1197*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&zg);CHKERRQ(ierr);
1198b0a32e0cSBarry Smith   PetscLogFlops(2*a->nz*a->bs2);
11992d61bbb3SSatish Balay   PetscFunctionReturn(0);
12002d61bbb3SSatish Balay }
12012d61bbb3SSatish Balay 
12024a2ae208SSatish Balay #undef __FUNCT__
12034a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqBAIJ"
1204268466fbSBarry Smith int MatScale_SeqBAIJ(const PetscScalar *alpha,Mat inA)
12052d61bbb3SSatish Balay {
12062d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
120787052302SDinesh Kaushik   int         totalnz = a->bs2*a->nz;
12083eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
12093eda8832SBarry Smith   int         i;
121087052302SDinesh Kaushik #else
121187052302SDinesh Kaushik   int         one = 1;
12123eda8832SBarry Smith #endif
12132d61bbb3SSatish Balay 
12142d61bbb3SSatish Balay   PetscFunctionBegin;
12153eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
12163eda8832SBarry Smith   for (i=0; i<totalnz; i++) a->a[i] *= *alpha;
12173eda8832SBarry Smith #else
1218268466fbSBarry Smith   BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one);
12193eda8832SBarry Smith #endif
1220b0a32e0cSBarry Smith   PetscLogFlops(totalnz);
12212d61bbb3SSatish Balay   PetscFunctionReturn(0);
12222d61bbb3SSatish Balay }
12232d61bbb3SSatish Balay 
12244a2ae208SSatish Balay #undef __FUNCT__
12254a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqBAIJ"
1226329f5518SBarry Smith int MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
12272d61bbb3SSatish Balay {
12282d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
12293f1db9ecSBarry Smith   MatScalar   *v = a->a;
1230329f5518SBarry Smith   PetscReal   sum = 0.0;
12310e90e235SBarry Smith   int         i,j,k,bs = a->bs,nz=a->nz,bs2=a->bs2,k1;
12322d61bbb3SSatish Balay 
12332d61bbb3SSatish Balay   PetscFunctionBegin;
12342d61bbb3SSatish Balay   if (type == NORM_FROBENIUS) {
12352d61bbb3SSatish Balay     for (i=0; i< bs2*nz; i++) {
1236aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
1237329f5518SBarry Smith       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
12382d61bbb3SSatish Balay #else
12392d61bbb3SSatish Balay       sum += (*v)*(*v); v++;
12402d61bbb3SSatish Balay #endif
12412d61bbb3SSatish Balay     }
12422d61bbb3SSatish Balay     *norm = sqrt(sum);
1243596552b5SBarry Smith   }  else if (type == NORM_INFINITY) { /* maximum row sum */
1244596552b5SBarry Smith     *norm = 0.0;
1245596552b5SBarry Smith     for (k=0; k<bs; k++) {
124674f84c7bSSatish Balay       for (j=0; j<a->mbs; j++) {
1247596552b5SBarry Smith         v = a->a + bs2*a->i[j] + k;
1248596552b5SBarry Smith         sum = 0.0;
1249596552b5SBarry Smith         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
12500e90e235SBarry Smith           for (k1=0; k1<bs; k1++){
1251596552b5SBarry Smith             sum += PetscAbsScalar(*v);
1252596552b5SBarry Smith             v   += bs;
12532d61bbb3SSatish Balay           }
12540e90e235SBarry Smith         }
1255596552b5SBarry Smith         if (sum > *norm) *norm = sum;
1256596552b5SBarry Smith       }
1257596552b5SBarry Smith     }
1258596552b5SBarry Smith   } else {
125929bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
12602d61bbb3SSatish Balay   }
12612d61bbb3SSatish Balay   PetscFunctionReturn(0);
12622d61bbb3SSatish Balay }
12632d61bbb3SSatish Balay 
12642d61bbb3SSatish Balay 
12654a2ae208SSatish Balay #undef __FUNCT__
12664a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqBAIJ"
12672d61bbb3SSatish Balay int MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg)
12682d61bbb3SSatish Balay {
12692d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
12700f5bd95cSBarry Smith   int         ierr;
12712d61bbb3SSatish Balay 
12722d61bbb3SSatish Balay   PetscFunctionBegin;
12732d61bbb3SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1274273d9f13SBarry Smith   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
1275273d9f13SBarry Smith     *flg = PETSC_FALSE;
1276273d9f13SBarry Smith     PetscFunctionReturn(0);
12772d61bbb3SSatish Balay   }
12782d61bbb3SSatish Balay 
12792d61bbb3SSatish Balay   /* if the a->i are the same */
12800f5bd95cSBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr);
12810f5bd95cSBarry Smith   if (*flg == PETSC_FALSE) {
12820f5bd95cSBarry Smith     PetscFunctionReturn(0);
12832d61bbb3SSatish Balay   }
12842d61bbb3SSatish Balay 
12852d61bbb3SSatish Balay   /* if a->j are the same */
12860f5bd95cSBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);CHKERRQ(ierr);
12870f5bd95cSBarry Smith   if (*flg == PETSC_FALSE) {
12880f5bd95cSBarry Smith     PetscFunctionReturn(0);
12892d61bbb3SSatish Balay   }
12902d61bbb3SSatish Balay   /* if a->a are the same */
129187828ca2SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
12922d61bbb3SSatish Balay   PetscFunctionReturn(0);
12932d61bbb3SSatish Balay 
12942d61bbb3SSatish Balay }
12952d61bbb3SSatish Balay 
12964a2ae208SSatish Balay #undef __FUNCT__
12974a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqBAIJ"
12982d61bbb3SSatish Balay int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
12992d61bbb3SSatish Balay {
13002d61bbb3SSatish Balay   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1301e1311b90SBarry Smith   int          ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
130287828ca2SBarry Smith   PetscScalar  *x,zero = 0.0;
13033f1db9ecSBarry Smith   MatScalar    *aa,*aa_j;
13042d61bbb3SSatish Balay 
13052d61bbb3SSatish Balay   PetscFunctionBegin;
130629bbc08cSBarry Smith   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
13072d61bbb3SSatish Balay   bs   = a->bs;
13082d61bbb3SSatish Balay   aa   = a->a;
13092d61bbb3SSatish Balay   ai   = a->i;
13102d61bbb3SSatish Balay   aj   = a->j;
13112d61bbb3SSatish Balay   ambs = a->mbs;
13122d61bbb3SSatish Balay   bs2  = a->bs2;
13132d61bbb3SSatish Balay 
1314e1311b90SBarry Smith   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1315*b1d4fb26SBarry Smith   ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr);
1316e1311b90SBarry Smith   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1317273d9f13SBarry Smith   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
13182d61bbb3SSatish Balay   for (i=0; i<ambs; i++) {
13192d61bbb3SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
13202d61bbb3SSatish Balay       if (aj[j] == i) {
13212d61bbb3SSatish Balay         row  = i*bs;
13222d61bbb3SSatish Balay         aa_j = aa+j*bs2;
13232d61bbb3SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
13242d61bbb3SSatish Balay         break;
13252d61bbb3SSatish Balay       }
13262d61bbb3SSatish Balay     }
13272d61bbb3SSatish Balay   }
1328*b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr);
13292d61bbb3SSatish Balay   PetscFunctionReturn(0);
13302d61bbb3SSatish Balay }
13312d61bbb3SSatish Balay 
13324a2ae208SSatish Balay #undef __FUNCT__
13334a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqBAIJ"
13342d61bbb3SSatish Balay int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
13352d61bbb3SSatish Balay {
13362d61bbb3SSatish Balay   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
133787828ca2SBarry Smith   PetscScalar  *l,*r,x,*li,*ri;
13383f1db9ecSBarry Smith   MatScalar    *aa,*v;
1339e1311b90SBarry Smith   int          ierr,i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
13402d61bbb3SSatish Balay 
13412d61bbb3SSatish Balay   PetscFunctionBegin;
13422d61bbb3SSatish Balay   ai  = a->i;
13432d61bbb3SSatish Balay   aj  = a->j;
13442d61bbb3SSatish Balay   aa  = a->a;
1345273d9f13SBarry Smith   m   = A->m;
1346273d9f13SBarry Smith   n   = A->n;
13472d61bbb3SSatish Balay   bs  = a->bs;
13482d61bbb3SSatish Balay   mbs = a->mbs;
13492d61bbb3SSatish Balay   bs2 = a->bs2;
13502d61bbb3SSatish Balay   if (ll) {
1351*b1d4fb26SBarry Smith     ierr = VecGetArrayFast(ll,&l);CHKERRQ(ierr);
1352e1311b90SBarry Smith     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
135329bbc08cSBarry Smith     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
13542d61bbb3SSatish Balay     for (i=0; i<mbs; i++) { /* for each block row */
13552d61bbb3SSatish Balay       M  = ai[i+1] - ai[i];
13562d61bbb3SSatish Balay       li = l + i*bs;
13572d61bbb3SSatish Balay       v  = aa + bs2*ai[i];
13582d61bbb3SSatish Balay       for (j=0; j<M; j++) { /* for each block */
13592d61bbb3SSatish Balay         for (k=0; k<bs2; k++) {
13602d61bbb3SSatish Balay           (*v++) *= li[k%bs];
13612d61bbb3SSatish Balay         }
13622d61bbb3SSatish Balay       }
13632d61bbb3SSatish Balay     }
1364*b1d4fb26SBarry Smith     ierr = VecRestoreArrayFast(ll,&l);CHKERRQ(ierr);
1365b0a32e0cSBarry Smith     PetscLogFlops(a->nz);
13662d61bbb3SSatish Balay   }
13672d61bbb3SSatish Balay 
13682d61bbb3SSatish Balay   if (rr) {
1369*b1d4fb26SBarry Smith     ierr = VecGetArrayFast(rr,&r);CHKERRQ(ierr);
1370e1311b90SBarry Smith     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
137129bbc08cSBarry Smith     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
13722d61bbb3SSatish Balay     for (i=0; i<mbs; i++) { /* for each block row */
13732d61bbb3SSatish Balay       M  = ai[i+1] - ai[i];
13742d61bbb3SSatish Balay       v  = aa + bs2*ai[i];
13752d61bbb3SSatish Balay       for (j=0; j<M; j++) { /* for each block */
13762d61bbb3SSatish Balay         ri = r + bs*aj[ai[i]+j];
13772d61bbb3SSatish Balay         for (k=0; k<bs; k++) {
13782d61bbb3SSatish Balay           x = ri[k];
13792d61bbb3SSatish Balay           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
13802d61bbb3SSatish Balay         }
13812d61bbb3SSatish Balay       }
13822d61bbb3SSatish Balay     }
1383*b1d4fb26SBarry Smith     ierr = VecRestoreArrayFast(rr,&r);CHKERRQ(ierr);
1384b0a32e0cSBarry Smith     PetscLogFlops(a->nz);
13852d61bbb3SSatish Balay   }
13862d61bbb3SSatish Balay   PetscFunctionReturn(0);
13872d61bbb3SSatish Balay }
13882d61bbb3SSatish Balay 
13892d61bbb3SSatish Balay 
13904a2ae208SSatish Balay #undef __FUNCT__
13914a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqBAIJ"
13922d61bbb3SSatish Balay int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
13932d61bbb3SSatish Balay {
13942d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
13952d61bbb3SSatish Balay 
13962d61bbb3SSatish Balay   PetscFunctionBegin;
1397273d9f13SBarry Smith   info->rows_global    = (double)A->m;
1398273d9f13SBarry Smith   info->columns_global = (double)A->n;
1399273d9f13SBarry Smith   info->rows_local     = (double)A->m;
1400273d9f13SBarry Smith   info->columns_local  = (double)A->n;
14012d61bbb3SSatish Balay   info->block_size     = a->bs2;
14022d61bbb3SSatish Balay   info->nz_allocated   = a->maxnz;
14032d61bbb3SSatish Balay   info->nz_used        = a->bs2*a->nz;
14042d61bbb3SSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
14052d61bbb3SSatish Balay   info->assemblies   = A->num_ass;
14062d61bbb3SSatish Balay   info->mallocs      = a->reallocs;
14072d61bbb3SSatish Balay   info->memory       = A->mem;
14082d61bbb3SSatish Balay   if (A->factor) {
14092d61bbb3SSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
14102d61bbb3SSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
14112d61bbb3SSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
14122d61bbb3SSatish Balay   } else {
14132d61bbb3SSatish Balay     info->fill_ratio_given  = 0;
14142d61bbb3SSatish Balay     info->fill_ratio_needed = 0;
14152d61bbb3SSatish Balay     info->factor_mallocs    = 0;
14162d61bbb3SSatish Balay   }
14172d61bbb3SSatish Balay   PetscFunctionReturn(0);
14182d61bbb3SSatish Balay }
14192d61bbb3SSatish Balay 
14202d61bbb3SSatish Balay 
14214a2ae208SSatish Balay #undef __FUNCT__
14224a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqBAIJ"
14232d61bbb3SSatish Balay int MatZeroEntries_SeqBAIJ(Mat A)
14242d61bbb3SSatish Balay {
14252d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1426549d3d68SSatish Balay   int         ierr;
14272d61bbb3SSatish Balay 
14282d61bbb3SSatish Balay   PetscFunctionBegin;
1429549d3d68SSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
14302d61bbb3SSatish Balay   PetscFunctionReturn(0);
14312d61bbb3SSatish Balay }
1432