xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision d9eff348d02e27cdd2d39dde40019e558f27352f)
1*d9eff348SSatish Balay /*$Id: sbaij2.c,v 1.5 2000/08/13 15:03:03 bsmith Exp balay $*/
249b5e25fSSatish Balay 
349b5e25fSSatish Balay #include "petscsys.h"
449b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h"
549b5e25fSSatish Balay #include "src/vec/vecimpl.h"
649b5e25fSSatish Balay #include "src/inline/spops.h"
749b5e25fSSatish Balay #include "src/inline/ilu.h"
849b5e25fSSatish Balay #include "petscbt.h"
949b5e25fSSatish Balay #include "sbaij.h"
1049b5e25fSSatish Balay 
1149b5e25fSSatish Balay #undef __FUNC__
1249b5e25fSSatish Balay #define __FUNC__ "MatIncreaseOverlap_SeqSBAIJ"
1349b5e25fSSatish Balay int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS *is,int ov)
1449b5e25fSSatish Balay {
1549b5e25fSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1649b5e25fSSatish Balay   int         row,i,j,k,l,m,n,*idx,ierr,*nidx,isz,val,ival;
1749b5e25fSSatish Balay   int         start,end,*ai,*aj,bs,*nidx2;
1849b5e25fSSatish Balay   PetscBT     table;
1949b5e25fSSatish Balay 
2049b5e25fSSatish Balay   PetscFunctionBegin;
2149b5e25fSSatish Balay   m     = a->mbs;
2249b5e25fSSatish Balay   ai    = a->i;
2349b5e25fSSatish Balay   aj    = a->j;
2449b5e25fSSatish Balay   bs    = a->bs;
2549b5e25fSSatish Balay 
2649b5e25fSSatish Balay   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative overlap specified");
2749b5e25fSSatish Balay 
2849b5e25fSSatish Balay   ierr  = PetscBTCreate(m,table);CHKERRQ(ierr);
2949b5e25fSSatish Balay   nidx  = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(nidx);
3049b5e25fSSatish Balay   nidx2 = (int *)PetscMalloc((a->m+1)*sizeof(int));CHKPTRQ(nidx2);
3149b5e25fSSatish Balay 
3249b5e25fSSatish Balay   for (i=0; i<is_max; i++) {
3349b5e25fSSatish Balay     /* Initialise the two local arrays */
3449b5e25fSSatish Balay     isz  = 0;
3549b5e25fSSatish Balay     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
3649b5e25fSSatish Balay 
3749b5e25fSSatish Balay     /* Extract the indices, assume there can be duplicate entries */
3849b5e25fSSatish Balay     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
3949b5e25fSSatish Balay     ierr = ISGetSize(is[i],&n);CHKERRQ(ierr);
4049b5e25fSSatish Balay 
4149b5e25fSSatish Balay     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
4249b5e25fSSatish Balay     for (j=0; j<n ; ++j){
4349b5e25fSSatish Balay       ival = idx[j]/bs; /* convert the indices into block indices */
4449b5e25fSSatish Balay       if (ival>m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"index greater than mat-dim");
4549b5e25fSSatish Balay       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
4649b5e25fSSatish Balay     }
4749b5e25fSSatish Balay     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
4849b5e25fSSatish Balay     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
4949b5e25fSSatish Balay     k = 0;
5049b5e25fSSatish Balay     for (j=0; j<ov; j++){ /* for each overlap*/
5149b5e25fSSatish Balay       n = isz;
5249b5e25fSSatish Balay       for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
5349b5e25fSSatish Balay         row   = nidx[k];
5449b5e25fSSatish Balay         start = ai[row];
5549b5e25fSSatish Balay         end   = ai[row+1];
5649b5e25fSSatish Balay         for (l = start; l<end ; l++){
5749b5e25fSSatish Balay           val = aj[l];
5849b5e25fSSatish Balay           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
5949b5e25fSSatish Balay         }
6049b5e25fSSatish Balay       }
6149b5e25fSSatish Balay     }
6249b5e25fSSatish Balay     /* expand the Index Set */
6349b5e25fSSatish Balay     for (j=0; j<isz; j++) {
6449b5e25fSSatish Balay       for (k=0; k<bs; k++)
6549b5e25fSSatish Balay         nidx2[j*bs+k] = nidx[j]*bs+k;
6649b5e25fSSatish Balay     }
6749b5e25fSSatish Balay     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
6849b5e25fSSatish Balay   }
6949b5e25fSSatish Balay   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
7049b5e25fSSatish Balay   ierr = PetscFree(nidx);CHKERRQ(ierr);
7149b5e25fSSatish Balay   ierr = PetscFree(nidx2);CHKERRQ(ierr);
7249b5e25fSSatish Balay   PetscFunctionReturn(0);
7349b5e25fSSatish Balay }
7449b5e25fSSatish Balay 
7549b5e25fSSatish Balay #undef __FUNC__
7649b5e25fSSatish Balay #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ_Private"
7749b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
7849b5e25fSSatish Balay {
7949b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
8049b5e25fSSatish Balay   int          *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens;
8149b5e25fSSatish Balay   int          row,mat_i,*mat_j,tcol,*mat_ilen;
8249b5e25fSSatish Balay   int          *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2;
8349b5e25fSSatish Balay   int          *aj = a->j,*ai = a->i;
8449b5e25fSSatish Balay   MatScalar    *mat_a;
8549b5e25fSSatish Balay   Mat          C;
8649b5e25fSSatish Balay   PetscTruth   flag;
8749b5e25fSSatish Balay 
8849b5e25fSSatish Balay   PetscFunctionBegin;
8949b5e25fSSatish Balay 
9049b5e25fSSatish Balay   if (isrow != iscol) SETERRA(1,0,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
9149b5e25fSSatish Balay   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
9249b5e25fSSatish Balay   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"IS is not sorted");
9349b5e25fSSatish Balay 
9449b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
9549b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
9649b5e25fSSatish Balay 
9749b5e25fSSatish Balay   smap  = (int*)PetscMalloc((1+oldcols)*sizeof(int));CHKPTRQ(smap);
9849b5e25fSSatish Balay   ssmap = smap;
9949b5e25fSSatish Balay   lens  = (int*)PetscMalloc((1+nrows)*sizeof(int));CHKPTRQ(lens);
10049b5e25fSSatish Balay   ierr  = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr);
10149b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
10249b5e25fSSatish Balay   /* determine lens of each row */
10349b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
10449b5e25fSSatish Balay     kstart  = ai[irow[i]];
10549b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
10649b5e25fSSatish Balay     lens[i] = 0;
10749b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
10849b5e25fSSatish Balay         if (ssmap[aj[k]]) {
10949b5e25fSSatish Balay           lens[i]++;
11049b5e25fSSatish Balay         }
11149b5e25fSSatish Balay       }
11249b5e25fSSatish Balay     }
11349b5e25fSSatish Balay   /* Create and fill new matrix */
11449b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
11549b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
11649b5e25fSSatish Balay 
11749b5e25fSSatish Balay     if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Submatrix wrong size");
11849b5e25fSSatish Balay     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr);
11949b5e25fSSatish Balay     if (flag == PETSC_FALSE) {
12049b5e25fSSatish Balay       SETERRQ(PETSC_ERR_ARG_SIZ,0,"Cannot reuse matrix. wrong no of nonzeros");
12149b5e25fSSatish Balay     }
12249b5e25fSSatish Balay     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr);
12349b5e25fSSatish Balay     C = *B;
12449b5e25fSSatish Balay   } else {
12549b5e25fSSatish Balay     ierr = MatCreateSeqSBAIJ(A->comm,bs,nrows*bs,nrows*bs,0,lens,&C);CHKERRQ(ierr);
12649b5e25fSSatish Balay   }
12749b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
12849b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12949b5e25fSSatish Balay     row    = irow[i];
13049b5e25fSSatish Balay     kstart = ai[row];
13149b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
13249b5e25fSSatish Balay     mat_i  = c->i[i];
13349b5e25fSSatish Balay     mat_j  = c->j + mat_i;
13449b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
13549b5e25fSSatish Balay     mat_ilen = c->ilen + i;
13649b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
13749b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
13849b5e25fSSatish Balay         *mat_j++ = tcol - 1;
13949b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
14049b5e25fSSatish Balay         mat_a   += bs2;
14149b5e25fSSatish Balay         (*mat_ilen)++;
14249b5e25fSSatish Balay       }
14349b5e25fSSatish Balay     }
14449b5e25fSSatish Balay   }
14549b5e25fSSatish Balay 
14649b5e25fSSatish Balay   /* Free work space */
14749b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
14849b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
14949b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15049b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15149b5e25fSSatish Balay 
15249b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
15349b5e25fSSatish Balay   *B = C;
15449b5e25fSSatish Balay   PetscFunctionReturn(0);
15549b5e25fSSatish Balay }
15649b5e25fSSatish Balay 
15749b5e25fSSatish Balay #undef __FUNC__
15849b5e25fSSatish Balay #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ"
15949b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
16049b5e25fSSatish Balay {
16149b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
16249b5e25fSSatish Balay   IS          is1;
16349b5e25fSSatish Balay   int         *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count;
16449b5e25fSSatish Balay 
16549b5e25fSSatish Balay   PetscFunctionBegin;
16649b5e25fSSatish Balay   if (isrow != iscol) SETERRA(1,0,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
16749b5e25fSSatish Balay 
16849b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
16949b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
17049b5e25fSSatish Balay 
17149b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
17249b5e25fSSatish Balay    and form the IS with compressed IS */
17349b5e25fSSatish Balay   vary = (int*)PetscMalloc(2*(a->mbs+1)*sizeof(int));CHKPTRQ(vary);
17449b5e25fSSatish Balay   iary = vary + a->mbs;
17549b5e25fSSatish Balay   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
17649b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
17749b5e25fSSatish Balay 
17849b5e25fSSatish Balay   count = 0;
17949b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
18049b5e25fSSatish Balay     if (vary[i]!=0 && vary[i]!=bs) SETERRA(1,0,"Index set does not match blocks");
18149b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
18249b5e25fSSatish Balay   }
18349b5e25fSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
18449b5e25fSSatish Balay 
18549b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
18649b5e25fSSatish Balay   ierr = PetscFree(vary);CHKERRQ(ierr);
18749b5e25fSSatish Balay 
18849b5e25fSSatish Balay   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr);
18949b5e25fSSatish Balay   ISDestroy(is1);
19049b5e25fSSatish Balay   PetscFunctionReturn(0);
19149b5e25fSSatish Balay }
19249b5e25fSSatish Balay 
19349b5e25fSSatish Balay #undef __FUNC__
19449b5e25fSSatish Balay #define __FUNC__ "MatGetSubMatrices_SeqSBAIJ"
19549b5e25fSSatish Balay int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B)
19649b5e25fSSatish Balay {
19749b5e25fSSatish Balay   int ierr,i;
19849b5e25fSSatish Balay 
19949b5e25fSSatish Balay   PetscFunctionBegin;
20049b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
20149b5e25fSSatish Balay     *B = (Mat*)PetscMalloc((n+1)*sizeof(Mat));CHKPTRQ(*B);
20249b5e25fSSatish Balay   }
20349b5e25fSSatish Balay 
20449b5e25fSSatish Balay   for (i=0; i<n; i++) {
20549b5e25fSSatish Balay     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
20649b5e25fSSatish Balay   }
20749b5e25fSSatish Balay   PetscFunctionReturn(0);
20849b5e25fSSatish Balay }
20949b5e25fSSatish Balay 
21049b5e25fSSatish Balay /* -------------------------------------------------------*/
21149b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
21249b5e25fSSatish Balay /* -------------------------------------------------------*/
213*d9eff348SSatish Balay #include "petscblaslapack.h"
21449b5e25fSSatish Balay 
21549b5e25fSSatish Balay #undef __FUNC__
21649b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_1"
21749b5e25fSSatish Balay int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
21849b5e25fSSatish Balay {
21949b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
22049b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,zero=0.0;
22149b5e25fSSatish Balay   MatScalar       *v;
22249b5e25fSSatish Balay   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j;
22349b5e25fSSatish Balay 
22449b5e25fSSatish Balay   PetscFunctionBegin;
22549b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
22649b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
22749b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
22849b5e25fSSatish Balay 
22949b5e25fSSatish Balay   v  = a->a;
23049b5e25fSSatish Balay   xb = x;
23149b5e25fSSatish Balay 
23249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
23349b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
23449b5e25fSSatish Balay     x1 = xb[0];
23549b5e25fSSatish Balay     ib = aj + *ai;
23649b5e25fSSatish Balay     z[i] += *v++ * x[*ib++];   /* (diag of A)*x */
23749b5e25fSSatish Balay     for (j=1; j<n; j++) {
23849b5e25fSSatish Balay       cval    = *ib;
23949b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
24049b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
24149b5e25fSSatish Balay     }
24249b5e25fSSatish Balay     xb++; ai++;
24349b5e25fSSatish Balay   }
24449b5e25fSSatish Balay 
24549b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
24649b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
24749b5e25fSSatish Balay   PLogFlops(2*(a->s_nz*2 - a->m) - a->m);  /* s_nz = (nz+m)/2 */
24849b5e25fSSatish Balay   PetscFunctionReturn(0);
24949b5e25fSSatish Balay }
25049b5e25fSSatish Balay 
25149b5e25fSSatish Balay #undef __FUNC__
25249b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_2"
25349b5e25fSSatish Balay int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
25449b5e25fSSatish Balay {
25549b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
25649b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,x2,zero=0.0;
25749b5e25fSSatish Balay   MatScalar       *v;
25849b5e25fSSatish Balay   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j;
25949b5e25fSSatish Balay 
26049b5e25fSSatish Balay 
26149b5e25fSSatish Balay   PetscFunctionBegin;
26249b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
26349b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
26449b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
26549b5e25fSSatish Balay 
26649b5e25fSSatish Balay   v     = a->a;
26749b5e25fSSatish Balay   xb = x;
26849b5e25fSSatish Balay 
26949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
27049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
27149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
27249b5e25fSSatish Balay     ib = aj + *ai;
27349b5e25fSSatish Balay     /* (diag of A)*x */
27449b5e25fSSatish Balay     z[2*i]   += v[0]*x1 + v[2]*x2;
27549b5e25fSSatish Balay     z[2*i+1] += v[2]*x1 + v[3]*x2;
27649b5e25fSSatish Balay     v += 4;
27749b5e25fSSatish Balay     for (j=1; j<n; j++) {
27849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
27949b5e25fSSatish Balay       cval       = ib[j]*2;
28049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
28149b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
28249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
28349b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
28449b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
28549b5e25fSSatish Balay       v  += 4;
28649b5e25fSSatish Balay     }
28749b5e25fSSatish Balay     xb +=2; ai++;
28849b5e25fSSatish Balay   }
28949b5e25fSSatish Balay 
29049b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
29149b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
29249b5e25fSSatish Balay   PLogFlops(8*(a->s_nz*2 - a->m) - a->m);
29349b5e25fSSatish Balay   PetscFunctionReturn(0);
29449b5e25fSSatish Balay }
29549b5e25fSSatish Balay 
29649b5e25fSSatish Balay #undef __FUNC__
29749b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_3"
29849b5e25fSSatish Balay int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
29949b5e25fSSatish Balay {
30049b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
30149b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,x2,x3,zero=0.0;
30249b5e25fSSatish Balay   MatScalar       *v;
30349b5e25fSSatish Balay   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j;
30449b5e25fSSatish Balay 
30549b5e25fSSatish Balay 
30649b5e25fSSatish Balay   PetscFunctionBegin;
30749b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
30849b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
30949b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
31049b5e25fSSatish Balay 
31149b5e25fSSatish Balay   v     = a->a;
31249b5e25fSSatish Balay   xb = x;
31349b5e25fSSatish Balay 
31449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
31549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
31649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
31749b5e25fSSatish Balay     ib = aj + *ai;
31849b5e25fSSatish Balay     /* (diag of A)*x */
31949b5e25fSSatish Balay     z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
32049b5e25fSSatish Balay     z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
32149b5e25fSSatish Balay     z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
32249b5e25fSSatish Balay     v += 9;
32349b5e25fSSatish Balay     for (j=1; j<n; j++) {
32449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
32549b5e25fSSatish Balay       cval       = ib[j]*3;
32649b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
32749b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
32849b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
32949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
33049b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
33149b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
33249b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
33349b5e25fSSatish Balay       v  += 9;
33449b5e25fSSatish Balay     }
33549b5e25fSSatish Balay     xb +=3; ai++;
33649b5e25fSSatish Balay   }
33749b5e25fSSatish Balay 
33849b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
33949b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
34049b5e25fSSatish Balay   PLogFlops(18*(a->s_nz*2 - a->m) - a->m);
34149b5e25fSSatish Balay   PetscFunctionReturn(0);
34249b5e25fSSatish Balay }
34349b5e25fSSatish Balay 
34449b5e25fSSatish Balay #undef __FUNC__
34549b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_4"
34649b5e25fSSatish Balay int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
34749b5e25fSSatish Balay {
34849b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
34949b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
35049b5e25fSSatish Balay   MatScalar       *v;
35149b5e25fSSatish Balay   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j;
35249b5e25fSSatish Balay 
35349b5e25fSSatish Balay   PetscFunctionBegin;
35449b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
35549b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
35649b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
35749b5e25fSSatish Balay 
35849b5e25fSSatish Balay   v     = a->a;
35949b5e25fSSatish Balay   xb = x;
36049b5e25fSSatish Balay 
36149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
36249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
36349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
36449b5e25fSSatish Balay     ib = aj + *ai;
36549b5e25fSSatish Balay     /* (diag of A)*x */
36649b5e25fSSatish Balay     z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
36749b5e25fSSatish Balay     z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
36849b5e25fSSatish Balay     z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
36949b5e25fSSatish Balay     z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
37049b5e25fSSatish Balay     v += 16;
37149b5e25fSSatish Balay     for (j=1; j<n; j++) {
37249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
37349b5e25fSSatish Balay       cval       = ib[j]*4;
37449b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
37549b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
37649b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
37749b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
37849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
37949b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
38049b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
38149b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
38249b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
38349b5e25fSSatish Balay       v  += 16;
38449b5e25fSSatish Balay     }
38549b5e25fSSatish Balay     xb +=4; ai++;
38649b5e25fSSatish Balay   }
38749b5e25fSSatish Balay 
38849b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
38949b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
39049b5e25fSSatish Balay   PLogFlops(32*(a->s_nz*2 - a->m) - a->m);
39149b5e25fSSatish Balay   PetscFunctionReturn(0);
39249b5e25fSSatish Balay }
39349b5e25fSSatish Balay 
39449b5e25fSSatish Balay #undef __FUNC__
39549b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_5"
39649b5e25fSSatish Balay int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
39749b5e25fSSatish Balay {
39849b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
39949b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
40049b5e25fSSatish Balay   MatScalar       *v;
40149b5e25fSSatish Balay   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j;
40249b5e25fSSatish Balay 
40349b5e25fSSatish Balay   PetscFunctionBegin;
40449b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
40549b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
40649b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
40749b5e25fSSatish Balay 
40849b5e25fSSatish Balay   v     = a->a;
40949b5e25fSSatish Balay   xb = x;
41049b5e25fSSatish Balay 
41149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
41249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
41349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
41449b5e25fSSatish Balay     ib = aj + *ai;
41549b5e25fSSatish Balay     /* (diag of A)*x */
41649b5e25fSSatish Balay     z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
41749b5e25fSSatish Balay     z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
41849b5e25fSSatish Balay     z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
41949b5e25fSSatish Balay     z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
42049b5e25fSSatish Balay     z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
42149b5e25fSSatish Balay     v += 25;
42249b5e25fSSatish Balay     for (j=1; j<n; j++) {
42349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
42449b5e25fSSatish Balay       cval       = ib[j]*5;
42549b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
42649b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
42749b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
42849b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
42949b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
43049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
43149b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
43249b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
43349b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
43449b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
43549b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
43649b5e25fSSatish Balay       v  += 25;
43749b5e25fSSatish Balay     }
43849b5e25fSSatish Balay     xb +=5; ai++;
43949b5e25fSSatish Balay   }
44049b5e25fSSatish Balay 
44149b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
44249b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
44349b5e25fSSatish Balay   PLogFlops(50*(a->s_nz*2 - a->m) - a->m);
44449b5e25fSSatish Balay   PetscFunctionReturn(0);
44549b5e25fSSatish Balay }
44649b5e25fSSatish Balay 
44749b5e25fSSatish Balay 
44849b5e25fSSatish Balay #undef __FUNC__
44949b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_6"
45049b5e25fSSatish Balay int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
45149b5e25fSSatish Balay {
45249b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
45349b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
45449b5e25fSSatish Balay   MatScalar       *v;
45549b5e25fSSatish Balay   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j;
45649b5e25fSSatish Balay 
45749b5e25fSSatish Balay   PetscFunctionBegin;
45849b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
45949b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
46049b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
46149b5e25fSSatish Balay 
46249b5e25fSSatish Balay   v     = a->a;
46349b5e25fSSatish Balay   xb = x;
46449b5e25fSSatish Balay 
46549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
46649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
46749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
46849b5e25fSSatish Balay     ib = aj + *ai;
46949b5e25fSSatish Balay     /* (diag of A)*x */
47049b5e25fSSatish Balay     z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
47149b5e25fSSatish Balay     z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
47249b5e25fSSatish Balay     z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
47349b5e25fSSatish Balay     z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
47449b5e25fSSatish Balay     z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
47549b5e25fSSatish Balay     z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
47649b5e25fSSatish Balay     v += 36;
47749b5e25fSSatish Balay     for (j=1; j<n; j++) {
47849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
47949b5e25fSSatish Balay       cval       = ib[j]*6;
48049b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
48149b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
48249b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
48349b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
48449b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
48549b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
48649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
48749b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
48849b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
48949b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
49049b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
49149b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
49249b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
49349b5e25fSSatish Balay       v  += 36;
49449b5e25fSSatish Balay     }
49549b5e25fSSatish Balay     xb +=6; ai++;
49649b5e25fSSatish Balay   }
49749b5e25fSSatish Balay 
49849b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
49949b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
50049b5e25fSSatish Balay   PLogFlops(72*(a->s_nz*2 - a->m) - a->m);
50149b5e25fSSatish Balay   PetscFunctionReturn(0);
50249b5e25fSSatish Balay }
50349b5e25fSSatish Balay #undef __FUNC__
50449b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_7"
50549b5e25fSSatish Balay int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
50649b5e25fSSatish Balay {
50749b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
50849b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
50949b5e25fSSatish Balay   MatScalar       *v;
51049b5e25fSSatish Balay   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j;
51149b5e25fSSatish Balay 
51249b5e25fSSatish Balay   PetscFunctionBegin;
51349b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
51449b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
51549b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
51649b5e25fSSatish Balay 
51749b5e25fSSatish Balay   v     = a->a;
51849b5e25fSSatish Balay   xb = x;
51949b5e25fSSatish Balay 
52049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
52149b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
52249b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
52349b5e25fSSatish Balay     ib = aj + *ai;
52449b5e25fSSatish Balay     /* (diag of A)*x */
52549b5e25fSSatish Balay     z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
52649b5e25fSSatish Balay     z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
52749b5e25fSSatish Balay     z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
52849b5e25fSSatish Balay     z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
52949b5e25fSSatish Balay     z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
53049b5e25fSSatish Balay     z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
53149b5e25fSSatish Balay     z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
53249b5e25fSSatish Balay     v += 49;
53349b5e25fSSatish Balay     for (j=1; j<n; j++) {
53449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
53549b5e25fSSatish Balay       cval       = ib[j]*7;
53649b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
53749b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
53849b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
53949b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
54049b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
54149b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
54249b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
54349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
54449b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
54549b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
54649b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
54749b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
54849b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
54949b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
55049b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
55149b5e25fSSatish Balay       v  += 49;
55249b5e25fSSatish Balay     }
55349b5e25fSSatish Balay     xb +=7; ai++;
55449b5e25fSSatish Balay   }
55549b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
55649b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
55749b5e25fSSatish Balay   PLogFlops(98*(a->s_nz*2 - a->m) - a->m);
55849b5e25fSSatish Balay   PetscFunctionReturn(0);
55949b5e25fSSatish Balay }
56049b5e25fSSatish Balay 
56149b5e25fSSatish Balay /*
56249b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
56349b5e25fSSatish Balay */
56449b5e25fSSatish Balay #undef __FUNC__
56549b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_N"
56649b5e25fSSatish Balay int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
56749b5e25fSSatish Balay {
56849b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
569066653e3SSatish Balay   Scalar          *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
570066653e3SSatish Balay   MatScalar       *v;
571066653e3SSatish Balay   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
572066653e3SSatish Balay   int             ncols,k;
57349b5e25fSSatish Balay 
57449b5e25fSSatish Balay   PetscFunctionBegin;
57549b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
57649b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
57749b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
57849b5e25fSSatish Balay 
57949b5e25fSSatish Balay   aj   = a->j;
58049b5e25fSSatish Balay   v    = a->a;
58149b5e25fSSatish Balay   ii   = a->i;
58249b5e25fSSatish Balay 
58349b5e25fSSatish Balay   if (!a->mult_work) {
58449b5e25fSSatish Balay     k = a->m;
58549b5e25fSSatish Balay     a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
58649b5e25fSSatish Balay   }
58749b5e25fSSatish Balay   work = a->mult_work;
58849b5e25fSSatish Balay 
58949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
59049b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
59149b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
59249b5e25fSSatish Balay 
59349b5e25fSSatish Balay     /* upper triangular part */
59449b5e25fSSatish Balay     for (j=0; j<n; j++) {
595066653e3SSatish Balay       /* col = bs*(*idx); */
59649b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
59749b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
59849b5e25fSSatish Balay       workt += bs;
59949b5e25fSSatish Balay     }
60049b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
60149b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
60249b5e25fSSatish Balay 
60349b5e25fSSatish Balay     /* strict lower triangular part */
60449b5e25fSSatish Balay     ncols -= bs;
60549b5e25fSSatish Balay     if (ncols > 0){
60649b5e25fSSatish Balay       workt = work;
60749b5e25fSSatish Balay       ierr  = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr);
60849b5e25fSSatish Balay       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v+bs2,workt);
60949b5e25fSSatish Balay 
61049b5e25fSSatish Balay       idx=aj+ii[0]+1;
61149b5e25fSSatish Balay       for (j=1; j<n; j++) {
61249b5e25fSSatish Balay         zb = z_ptr + bs*(*idx);
61349b5e25fSSatish Balay         idx++;
61449b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
61549b5e25fSSatish Balay         workt += bs;
61649b5e25fSSatish Balay       }
61749b5e25fSSatish Balay     }
61849b5e25fSSatish Balay 
61949b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
62049b5e25fSSatish Balay   }
62149b5e25fSSatish Balay 
62249b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
62349b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
62449b5e25fSSatish Balay   PLogFlops(2*(a->s_nz*2 - a->m)*bs2 - a->m);
62549b5e25fSSatish Balay   PetscFunctionReturn(0);
62649b5e25fSSatish Balay }
62749b5e25fSSatish Balay 
62849b5e25fSSatish Balay #undef __FUNC__
62949b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_1"
63049b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
63149b5e25fSSatish Balay {
63249b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
63349b5e25fSSatish Balay   Scalar          *x,*y,*z,*xb,x1;
63449b5e25fSSatish Balay   MatScalar       *v;
63549b5e25fSSatish Balay   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j;
63649b5e25fSSatish Balay 
63749b5e25fSSatish Balay   PetscFunctionBegin;
63849b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
63949b5e25fSSatish Balay   if (yy != xx) {
64049b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
64149b5e25fSSatish Balay   } else {
64249b5e25fSSatish Balay     y = x;
64349b5e25fSSatish Balay   }
64449b5e25fSSatish Balay   if (zz != yy) {
64549b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
64649b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
64749b5e25fSSatish Balay   } else {
64849b5e25fSSatish Balay     z = y;
64949b5e25fSSatish Balay   }
65049b5e25fSSatish Balay 
65149b5e25fSSatish Balay   v  = a->a;
65249b5e25fSSatish Balay   xb = x;
65349b5e25fSSatish Balay 
65449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
65549b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
65649b5e25fSSatish Balay     x1 = xb[0];
65749b5e25fSSatish Balay     ib = aj + *ai;
65849b5e25fSSatish Balay     z[i] += *v++ * x[*ib++];   /* (diag of A)*x */
65949b5e25fSSatish Balay     for (j=1; j<n; j++) {
66049b5e25fSSatish Balay       cval    = *ib;
66149b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
66249b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
66349b5e25fSSatish Balay     }
66449b5e25fSSatish Balay     xb++; ai++;
66549b5e25fSSatish Balay   }
66649b5e25fSSatish Balay 
66749b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
66849b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
66949b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
67049b5e25fSSatish Balay 
67149b5e25fSSatish Balay   PLogFlops(2*(a->s_nz*2 - a->m));
67249b5e25fSSatish Balay   PetscFunctionReturn(0);
67349b5e25fSSatish Balay }
67449b5e25fSSatish Balay 
67549b5e25fSSatish Balay #undef __FUNC__
67649b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_2"
67749b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
67849b5e25fSSatish Balay {
67949b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
68049b5e25fSSatish Balay   Scalar          *x,*y,*z,*xb,x1,x2;
68149b5e25fSSatish Balay   MatScalar       *v;
68249b5e25fSSatish Balay   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j;
68349b5e25fSSatish Balay 
68449b5e25fSSatish Balay   PetscFunctionBegin;
68549b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
68649b5e25fSSatish Balay   if (yy != xx) {
68749b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
68849b5e25fSSatish Balay   } else {
68949b5e25fSSatish Balay     y = x;
69049b5e25fSSatish Balay   }
69149b5e25fSSatish Balay   if (zz != yy) {
69249b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
69349b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
69449b5e25fSSatish Balay   } else {
69549b5e25fSSatish Balay     z = y;
69649b5e25fSSatish Balay   }
69749b5e25fSSatish Balay 
69849b5e25fSSatish Balay   v     = a->a;
69949b5e25fSSatish Balay   xb = x;
70049b5e25fSSatish Balay 
70149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
70249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
70349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
70449b5e25fSSatish Balay     ib = aj + *ai;
70549b5e25fSSatish Balay     /* (diag of A)*x */
70649b5e25fSSatish Balay     z[2*i]   += v[0]*x1 + v[2]*x2;
70749b5e25fSSatish Balay     z[2*i+1] += v[2]*x1 + v[3]*x2;
70849b5e25fSSatish Balay     v += 4;
70949b5e25fSSatish Balay     for (j=1; j<n; j++) {
71049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
71149b5e25fSSatish Balay       cval       = ib[j]*2;
71249b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
71349b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
71449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
71549b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
71649b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
71749b5e25fSSatish Balay       v  += 4;
71849b5e25fSSatish Balay     }
71949b5e25fSSatish Balay     xb +=2; ai++;
72049b5e25fSSatish Balay   }
72149b5e25fSSatish Balay 
72249b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
72349b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
72449b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
72549b5e25fSSatish Balay 
72649b5e25fSSatish Balay   PLogFlops(4*(a->s_nz*2 - a->m));
72749b5e25fSSatish Balay   PetscFunctionReturn(0);
72849b5e25fSSatish Balay }
72949b5e25fSSatish Balay 
73049b5e25fSSatish Balay #undef __FUNC__
73149b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_3"
73249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
73349b5e25fSSatish Balay {
73449b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
73549b5e25fSSatish Balay   Scalar          *x,*y,*z,*xb,x1,x2,x3;
73649b5e25fSSatish Balay   MatScalar       *v;
73749b5e25fSSatish Balay   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j;
73849b5e25fSSatish Balay 
73949b5e25fSSatish Balay   PetscFunctionBegin;
74049b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
74149b5e25fSSatish Balay   if (yy != xx) {
74249b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
74349b5e25fSSatish Balay   } else {
74449b5e25fSSatish Balay     y = x;
74549b5e25fSSatish Balay   }
74649b5e25fSSatish Balay   if (zz != yy) {
74749b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
74849b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
74949b5e25fSSatish Balay   } else {
75049b5e25fSSatish Balay     z = y;
75149b5e25fSSatish Balay   }
75249b5e25fSSatish Balay 
75349b5e25fSSatish Balay   v     = a->a;
75449b5e25fSSatish Balay   xb = x;
75549b5e25fSSatish Balay 
75649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
75749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
75849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
75949b5e25fSSatish Balay     ib = aj + *ai;
76049b5e25fSSatish Balay     /* (diag of A)*x */
76149b5e25fSSatish Balay     z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
76249b5e25fSSatish Balay     z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
76349b5e25fSSatish Balay     z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
76449b5e25fSSatish Balay     v += 9;
76549b5e25fSSatish Balay     for (j=1; j<n; j++) {
76649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
76749b5e25fSSatish Balay       cval       = ib[j]*3;
76849b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
76949b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
77049b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
77149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
77249b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
77349b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
77449b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
77549b5e25fSSatish Balay       v  += 9;
77649b5e25fSSatish Balay     }
77749b5e25fSSatish Balay     xb +=3; ai++;
77849b5e25fSSatish Balay   }
77949b5e25fSSatish Balay 
78049b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
78149b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
78249b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
78349b5e25fSSatish Balay 
78449b5e25fSSatish Balay   PLogFlops(18*(a->s_nz*2 - a->m));
78549b5e25fSSatish Balay   PetscFunctionReturn(0);
78649b5e25fSSatish Balay }
78749b5e25fSSatish Balay 
78849b5e25fSSatish Balay #undef __FUNC__
78949b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_4"
79049b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
79149b5e25fSSatish Balay {
79249b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
79349b5e25fSSatish Balay   Scalar          *x,*y,*z,*xb,x1,x2,x3,x4;
79449b5e25fSSatish Balay   MatScalar       *v;
79549b5e25fSSatish Balay   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j;
79649b5e25fSSatish Balay 
79749b5e25fSSatish Balay   PetscFunctionBegin;
79849b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
79949b5e25fSSatish Balay   if (yy != xx) {
80049b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
80149b5e25fSSatish Balay   } else {
80249b5e25fSSatish Balay     y = x;
80349b5e25fSSatish Balay   }
80449b5e25fSSatish Balay   if (zz != yy) {
80549b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
80649b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
80749b5e25fSSatish Balay   } else {
80849b5e25fSSatish Balay     z = y;
80949b5e25fSSatish Balay   }
81049b5e25fSSatish Balay 
81149b5e25fSSatish Balay   v     = a->a;
81249b5e25fSSatish Balay   xb = x;
81349b5e25fSSatish Balay 
81449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
81549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
81649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
81749b5e25fSSatish Balay     ib = aj + *ai;
81849b5e25fSSatish Balay     /* (diag of A)*x */
81949b5e25fSSatish Balay     z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
82049b5e25fSSatish Balay     z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
82149b5e25fSSatish Balay     z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
82249b5e25fSSatish Balay     z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
82349b5e25fSSatish Balay     v += 16;
82449b5e25fSSatish Balay     for (j=1; j<n; j++) {
82549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
82649b5e25fSSatish Balay       cval       = ib[j]*4;
82749b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
82849b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
82949b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
83049b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
83149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
83249b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
83349b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
83449b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
83549b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
83649b5e25fSSatish Balay       v  += 16;
83749b5e25fSSatish Balay     }
83849b5e25fSSatish Balay     xb +=4; ai++;
83949b5e25fSSatish Balay   }
84049b5e25fSSatish Balay 
84149b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
84249b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
84349b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
84449b5e25fSSatish Balay 
84549b5e25fSSatish Balay   PLogFlops(32*(a->s_nz*2 - a->m));
84649b5e25fSSatish Balay   PetscFunctionReturn(0);
84749b5e25fSSatish Balay }
84849b5e25fSSatish Balay 
84949b5e25fSSatish Balay #undef __FUNC__
85049b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_5"
85149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
85249b5e25fSSatish Balay {
85349b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
85449b5e25fSSatish Balay   Scalar          *x,*y,*z,*xb,x1,x2,x3,x4,x5;
85549b5e25fSSatish Balay   MatScalar       *v;
85649b5e25fSSatish Balay   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j;
85749b5e25fSSatish Balay 
85849b5e25fSSatish Balay   PetscFunctionBegin;
85949b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
86049b5e25fSSatish Balay   if (yy != xx) {
86149b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
86249b5e25fSSatish Balay   } else {
86349b5e25fSSatish Balay     y = x;
86449b5e25fSSatish Balay   }
86549b5e25fSSatish Balay   if (zz != yy) {
86649b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
86749b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
86849b5e25fSSatish Balay   } else {
86949b5e25fSSatish Balay     z = y;
87049b5e25fSSatish Balay   }
87149b5e25fSSatish Balay 
87249b5e25fSSatish Balay   v     = a->a;
87349b5e25fSSatish Balay   xb = x;
87449b5e25fSSatish Balay 
87549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
87649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
87749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
87849b5e25fSSatish Balay     ib = aj + *ai;
87949b5e25fSSatish Balay     /* (diag of A)*x */
88049b5e25fSSatish Balay     z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
88149b5e25fSSatish Balay     z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
88249b5e25fSSatish Balay     z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
88349b5e25fSSatish Balay     z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
88449b5e25fSSatish Balay     z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
88549b5e25fSSatish Balay     v += 25;
88649b5e25fSSatish Balay     for (j=1; j<n; j++) {
88749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
88849b5e25fSSatish Balay       cval       = ib[j]*5;
88949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
89049b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
89149b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
89249b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
89349b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
89449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
89549b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
89649b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
89749b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
89849b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
89949b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
90049b5e25fSSatish Balay       v  += 25;
90149b5e25fSSatish Balay     }
90249b5e25fSSatish Balay     xb +=5; ai++;
90349b5e25fSSatish Balay   }
90449b5e25fSSatish Balay 
90549b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
90649b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
90749b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
90849b5e25fSSatish Balay 
90949b5e25fSSatish Balay   PLogFlops(50*(a->s_nz*2 - a->m));
91049b5e25fSSatish Balay   PetscFunctionReturn(0);
91149b5e25fSSatish Balay }
91249b5e25fSSatish Balay #undef __FUNC__
91349b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_6"
91449b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
91549b5e25fSSatish Balay {
91649b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
91749b5e25fSSatish Balay   Scalar          *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6;
91849b5e25fSSatish Balay   MatScalar       *v;
91949b5e25fSSatish Balay   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j;
92049b5e25fSSatish Balay 
92149b5e25fSSatish Balay   PetscFunctionBegin;
92249b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
92349b5e25fSSatish Balay   if (yy != xx) {
92449b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
92549b5e25fSSatish Balay   } else {
92649b5e25fSSatish Balay     y = x;
92749b5e25fSSatish Balay   }
92849b5e25fSSatish Balay   if (zz != yy) {
92949b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
93049b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
93149b5e25fSSatish Balay   } else {
93249b5e25fSSatish Balay     z = y;
93349b5e25fSSatish Balay   }
93449b5e25fSSatish Balay 
93549b5e25fSSatish Balay   v     = a->a;
93649b5e25fSSatish Balay   xb = x;
93749b5e25fSSatish Balay 
93849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
93949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
94049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
94149b5e25fSSatish Balay     ib = aj + *ai;
94249b5e25fSSatish Balay     /* (diag of A)*x */
94349b5e25fSSatish Balay     z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
94449b5e25fSSatish Balay     z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
94549b5e25fSSatish Balay     z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
94649b5e25fSSatish Balay     z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
94749b5e25fSSatish Balay     z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
94849b5e25fSSatish Balay     z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
94949b5e25fSSatish Balay     v += 36;
95049b5e25fSSatish Balay     for (j=1; j<n; j++) {
95149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
95249b5e25fSSatish Balay       cval       = ib[j]*6;
95349b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
95449b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
95549b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
95649b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
95749b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
95849b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
95949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
96049b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
96149b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
96249b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
96349b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
96449b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
96549b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
96649b5e25fSSatish Balay       v  += 36;
96749b5e25fSSatish Balay     }
96849b5e25fSSatish Balay     xb +=6; ai++;
96949b5e25fSSatish Balay   }
97049b5e25fSSatish Balay 
97149b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
97249b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
97349b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
97449b5e25fSSatish Balay 
97549b5e25fSSatish Balay   PLogFlops(72*(a->s_nz*2 - a->m));
97649b5e25fSSatish Balay   PetscFunctionReturn(0);
97749b5e25fSSatish Balay }
97849b5e25fSSatish Balay 
97949b5e25fSSatish Balay #undef __FUNC__
98049b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_7"
98149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
98249b5e25fSSatish Balay {
98349b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
98449b5e25fSSatish Balay   Scalar          *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
98549b5e25fSSatish Balay   MatScalar       *v;
98649b5e25fSSatish Balay   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j;
98749b5e25fSSatish Balay 
98849b5e25fSSatish Balay   PetscFunctionBegin;
98949b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
99049b5e25fSSatish Balay   if (yy != xx) {
99149b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
99249b5e25fSSatish Balay   } else {
99349b5e25fSSatish Balay     y = x;
99449b5e25fSSatish Balay   }
99549b5e25fSSatish Balay   if (zz != yy) {
99649b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
99749b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
99849b5e25fSSatish Balay   } else {
99949b5e25fSSatish Balay     z = y;
100049b5e25fSSatish Balay   }
100149b5e25fSSatish Balay 
100249b5e25fSSatish Balay   v     = a->a;
100349b5e25fSSatish Balay   xb = x;
100449b5e25fSSatish Balay 
100549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
100649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
100749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
100849b5e25fSSatish Balay     ib = aj + *ai;
100949b5e25fSSatish Balay     /* (diag of A)*x */
101049b5e25fSSatish Balay     z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
101149b5e25fSSatish Balay     z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
101249b5e25fSSatish Balay     z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
101349b5e25fSSatish Balay     z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
101449b5e25fSSatish Balay     z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
101549b5e25fSSatish Balay     z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
101649b5e25fSSatish Balay     z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
101749b5e25fSSatish Balay     v += 49;
101849b5e25fSSatish Balay     for (j=1; j<n; j++) {
101949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
102049b5e25fSSatish Balay       cval       = ib[j]*7;
102149b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
102249b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
102349b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
102449b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
102549b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
102649b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
102749b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
102849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
102949b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
103049b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
103149b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
103249b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
103349b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
103449b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
103549b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
103649b5e25fSSatish Balay       v  += 49;
103749b5e25fSSatish Balay     }
103849b5e25fSSatish Balay     xb +=7; ai++;
103949b5e25fSSatish Balay   }
104049b5e25fSSatish Balay 
104149b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
104249b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
104349b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
104449b5e25fSSatish Balay 
104549b5e25fSSatish Balay   PLogFlops(98*(a->s_nz*2 - a->m));
104649b5e25fSSatish Balay   PetscFunctionReturn(0);
104749b5e25fSSatish Balay }
104849b5e25fSSatish Balay 
104949b5e25fSSatish Balay #undef __FUNC__
105049b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_N"
105149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
105249b5e25fSSatish Balay {
105349b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
1054ecfa4421SSatish Balay   Scalar          *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1055066653e3SSatish Balay   MatScalar       *v;
1056066653e3SSatish Balay   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
1057066653e3SSatish Balay   int             ncols,k;
105849b5e25fSSatish Balay 
105949b5e25fSSatish Balay   PetscFunctionBegin;
106049b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
106149b5e25fSSatish Balay   if (yy != xx) {
106249b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
106349b5e25fSSatish Balay   } else {
106449b5e25fSSatish Balay     y = x;
106549b5e25fSSatish Balay   }
106649b5e25fSSatish Balay   if (zz != yy) {
106749b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
106849b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
106949b5e25fSSatish Balay   } else {
107049b5e25fSSatish Balay     z = y;
107149b5e25fSSatish Balay   }
107249b5e25fSSatish Balay 
107349b5e25fSSatish Balay   aj   = a->j;
107449b5e25fSSatish Balay   v    = a->a;
107549b5e25fSSatish Balay   ii   = a->i;
107649b5e25fSSatish Balay 
107749b5e25fSSatish Balay   if (!a->mult_work) {
107849b5e25fSSatish Balay     k = a->m;
107949b5e25fSSatish Balay     a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
108049b5e25fSSatish Balay   }
108149b5e25fSSatish Balay   work = a->mult_work;
108249b5e25fSSatish Balay 
108349b5e25fSSatish Balay 
108449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
108549b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
108649b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
108749b5e25fSSatish Balay 
108849b5e25fSSatish Balay     /* upper triangular part */
108949b5e25fSSatish Balay     for (j=0; j<n; j++) {
1090066653e3SSatish Balay       /* col = bs*(*idx); */
109149b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
109249b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
109349b5e25fSSatish Balay       workt += bs;
109449b5e25fSSatish Balay     }
109549b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
109649b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
109749b5e25fSSatish Balay 
109849b5e25fSSatish Balay     /* strict lower triangular part */
109949b5e25fSSatish Balay     ncols -= bs;
110049b5e25fSSatish Balay     if (ncols > 0){
110149b5e25fSSatish Balay       workt = work;
110249b5e25fSSatish Balay       ierr  = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr);
110349b5e25fSSatish Balay       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v+bs2,workt);
110449b5e25fSSatish Balay 
110549b5e25fSSatish Balay       idx=aj+ii[0]+1;
110649b5e25fSSatish Balay       for (j=1; j<n; j++) {
110749b5e25fSSatish Balay         zb = z_ptr + bs*(*idx);
110849b5e25fSSatish Balay         idx++;
110949b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
111049b5e25fSSatish Balay         workt += bs;
111149b5e25fSSatish Balay       }
111249b5e25fSSatish Balay     }
111349b5e25fSSatish Balay 
111449b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
111549b5e25fSSatish Balay   }
111649b5e25fSSatish Balay 
111749b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
111849b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
111949b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
112049b5e25fSSatish Balay 
112149b5e25fSSatish Balay   PLogFlops(2*(a->s_nz*2 - a->m));
112249b5e25fSSatish Balay   PetscFunctionReturn(0);
112349b5e25fSSatish Balay }
112449b5e25fSSatish Balay 
112549b5e25fSSatish Balay #undef __FUNC__
112649b5e25fSSatish Balay #define __FUNC__ "MatMultTranspose_SeqSBAIJ"
112749b5e25fSSatish Balay int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz)
112849b5e25fSSatish Balay {
112949b5e25fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
113049b5e25fSSatish Balay   Scalar          *xg,*zg,*zb,zero = 0.0;
113149b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
113249b5e25fSSatish Balay   MatScalar       *v;
113349b5e25fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval;
113449b5e25fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
113549b5e25fSSatish Balay 
113649b5e25fSSatish Balay   PetscFunctionBegin;
113749b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
113849b5e25fSSatish Balay   ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg;
113949b5e25fSSatish Balay   ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg;
114049b5e25fSSatish Balay 
114149b5e25fSSatish Balay   idx   = a->j;
114249b5e25fSSatish Balay   v     = a->a;
114349b5e25fSSatish Balay   ii    = a->i;
114449b5e25fSSatish Balay   xb    = x;
114549b5e25fSSatish Balay   switch (bs) {
114649b5e25fSSatish Balay   case 1:
114749b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
114849b5e25fSSatish Balay       n  = ii[1] - ii[0]; ii++;
114949b5e25fSSatish Balay       x1 = xb[0];
115049b5e25fSSatish Balay       ib = idx + ai[i];
115149b5e25fSSatish Balay       for (j=0; j<n; j++) {
115249b5e25fSSatish Balay         rval    = ib[j];
115349b5e25fSSatish Balay         z[rval] += *v * x1;
115449b5e25fSSatish Balay         v++;
115549b5e25fSSatish Balay       }
115649b5e25fSSatish Balay       xb++;
115749b5e25fSSatish Balay     }
115849b5e25fSSatish Balay     break;
115949b5e25fSSatish Balay   case 2:
116049b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
116149b5e25fSSatish Balay       n  = ii[1] - ii[0]; ii++;
116249b5e25fSSatish Balay       x1 = xb[0]; x2 = xb[1];
116349b5e25fSSatish Balay       ib = idx + ai[i];
116449b5e25fSSatish Balay       for (j=0; j<n; j++) {
116549b5e25fSSatish Balay         rval      = ib[j]*2;
116649b5e25fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
116749b5e25fSSatish Balay         z[rval]   += v[2]*x1 + v[3]*x2;
116849b5e25fSSatish Balay         v  += 4;
116949b5e25fSSatish Balay       }
117049b5e25fSSatish Balay       xb += 2;
117149b5e25fSSatish Balay     }
117249b5e25fSSatish Balay     break;
117349b5e25fSSatish Balay   case 3:
117449b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
117549b5e25fSSatish Balay       n  = ii[1] - ii[0]; ii++;
117649b5e25fSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
117749b5e25fSSatish Balay       ib = idx + ai[i];
117849b5e25fSSatish Balay       for (j=0; j<n; j++) {
117949b5e25fSSatish Balay         rval      = ib[j]*3;
118049b5e25fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
118149b5e25fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
118249b5e25fSSatish Balay         z[rval]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
118349b5e25fSSatish Balay         v  += 9;
118449b5e25fSSatish Balay       }
118549b5e25fSSatish Balay       xb += 3;
118649b5e25fSSatish Balay     }
118749b5e25fSSatish Balay     break;
118849b5e25fSSatish Balay   case 4:
118949b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
119049b5e25fSSatish Balay       n  = ii[1] - ii[0]; ii++;
119149b5e25fSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
119249b5e25fSSatish Balay       ib = idx + ai[i];
119349b5e25fSSatish Balay       for (j=0; j<n; j++) {
119449b5e25fSSatish Balay         rval      = ib[j]*4;
119549b5e25fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
119649b5e25fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
119749b5e25fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
119849b5e25fSSatish Balay         z[rval]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
119949b5e25fSSatish Balay         v  += 16;
120049b5e25fSSatish Balay       }
120149b5e25fSSatish Balay       xb += 4;
120249b5e25fSSatish Balay     }
120349b5e25fSSatish Balay     break;
120449b5e25fSSatish Balay   case 5:
120549b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
120649b5e25fSSatish Balay       n  = ii[1] - ii[0]; ii++;
120749b5e25fSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
120849b5e25fSSatish Balay       x4 = xb[3]; x5 = xb[4];
120949b5e25fSSatish Balay       ib = idx + ai[i];
121049b5e25fSSatish Balay       for (j=0; j<n; j++) {
121149b5e25fSSatish Balay         rval      = ib[j]*5;
121249b5e25fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
121349b5e25fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
121449b5e25fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
121549b5e25fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
121649b5e25fSSatish Balay         z[rval]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
121749b5e25fSSatish Balay         v  += 25;
121849b5e25fSSatish Balay       }
121949b5e25fSSatish Balay       xb += 5;
122049b5e25fSSatish Balay     }
122149b5e25fSSatish Balay     break;
122249b5e25fSSatish Balay   case 6:
122349b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
122449b5e25fSSatish Balay       n  = ii[1] - ii[0]; ii++;
122549b5e25fSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
122649b5e25fSSatish Balay       x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
122749b5e25fSSatish Balay       ib = idx + ai[i];
122849b5e25fSSatish Balay       for (j=0; j<n; j++) {
122949b5e25fSSatish Balay         rval      = ib[j]*6;
123049b5e25fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6;
123149b5e25fSSatish Balay         z[rval++] +=  v[6]*x1 +  v[7]*x2 +  v[8]*x3 +  v[9]*x4 + v[10]*x5 + v[11]*x6;
123249b5e25fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
123349b5e25fSSatish Balay         z[rval++] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
123449b5e25fSSatish Balay         z[rval++] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
123549b5e25fSSatish Balay         z[rval]   += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
123649b5e25fSSatish Balay         v  += 36;
123749b5e25fSSatish Balay       }
123849b5e25fSSatish Balay       xb += 6;
123949b5e25fSSatish Balay     }
124049b5e25fSSatish Balay     break;
124149b5e25fSSatish Balay   case 7:
124249b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
124349b5e25fSSatish Balay       n  = ii[1] - ii[0]; ii++;
124449b5e25fSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
124549b5e25fSSatish Balay       x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
124649b5e25fSSatish Balay       ib = idx + ai[i];
124749b5e25fSSatish Balay       for (j=0; j<n; j++) {
124849b5e25fSSatish Balay         rval      = ib[j]*7;
124949b5e25fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7;
125049b5e25fSSatish Balay         z[rval++] +=  v[7]*x1 +  v[8]*x2 +  v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
125149b5e25fSSatish Balay         z[rval++] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
125249b5e25fSSatish Balay         z[rval++] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
125349b5e25fSSatish Balay         z[rval++] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
125449b5e25fSSatish Balay         z[rval++] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
125549b5e25fSSatish Balay         z[rval]   += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
125649b5e25fSSatish Balay         v  += 49;
125749b5e25fSSatish Balay       }
125849b5e25fSSatish Balay       xb += 7;
125949b5e25fSSatish Balay     }
126049b5e25fSSatish Balay     break;
126149b5e25fSSatish Balay   default: {       /* block sizes larger then 7 by 7 are handled by BLAS */
126249b5e25fSSatish Balay       int       ncols,k;
126349b5e25fSSatish Balay       Scalar    *work,*workt;
126449b5e25fSSatish Balay 
126549b5e25fSSatish Balay       if (!a->mult_work) {
126649b5e25fSSatish Balay         k = PetscMax(a->m,a->n);
126749b5e25fSSatish Balay         a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
126849b5e25fSSatish Balay       }
126949b5e25fSSatish Balay       work = a->mult_work;
127049b5e25fSSatish Balay       for (i=0; i<mbs; i++) {
127149b5e25fSSatish Balay         n     = ii[1] - ii[0]; ii++;
127249b5e25fSSatish Balay         ncols = n*bs;
127349b5e25fSSatish Balay         ierr  = PetscMemzero(work,ncols*sizeof(Scalar));CHKERRQ(ierr);
127449b5e25fSSatish Balay         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
127549b5e25fSSatish Balay         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
127649b5e25fSSatish Balay         v += n*bs2;
127749b5e25fSSatish Balay         x += bs;
127849b5e25fSSatish Balay         workt = work;
127949b5e25fSSatish Balay         for (j=0; j<n; j++) {
128049b5e25fSSatish Balay           zb = z + bs*(*idx++);
128149b5e25fSSatish Balay           for (k=0; k<bs; k++) zb[k] += workt[k] ;
128249b5e25fSSatish Balay           workt += bs;
128349b5e25fSSatish Balay         }
128449b5e25fSSatish Balay       }
128549b5e25fSSatish Balay     }
128649b5e25fSSatish Balay   }
128749b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr);
128849b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr);
128949b5e25fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
129049b5e25fSSatish Balay   PetscFunctionReturn(0);
129149b5e25fSSatish Balay }
129249b5e25fSSatish Balay 
129349b5e25fSSatish Balay #undef __FUNC__
129449b5e25fSSatish Balay #define __FUNC__ "MatMultTransposeAdd_SeqSBAIJ"
129549b5e25fSSatish Balay int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
129649b5e25fSSatish Balay 
129749b5e25fSSatish Balay {
129849b5e25fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
129949b5e25fSSatish Balay   Scalar          *xg,*zg,*zb,*x,*z,*xb,x1,x2,x3,x4,x5;
130049b5e25fSSatish Balay   MatScalar       *v;
130149b5e25fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
130249b5e25fSSatish Balay 
130349b5e25fSSatish Balay   PetscFunctionBegin;
130449b5e25fSSatish Balay   ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg;
130549b5e25fSSatish Balay   ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg;
130649b5e25fSSatish Balay 
130749b5e25fSSatish Balay   if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); }
130849b5e25fSSatish Balay 
130949b5e25fSSatish Balay   idx   = a->j;
131049b5e25fSSatish Balay   v     = a->a;
131149b5e25fSSatish Balay   ii    = a->i;
131249b5e25fSSatish Balay   xb    = x;
131349b5e25fSSatish Balay 
131449b5e25fSSatish Balay   switch (bs) {
131549b5e25fSSatish Balay   case 1:
131649b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
131749b5e25fSSatish Balay       n  = ii[1] - ii[0]; ii++;
131849b5e25fSSatish Balay       x1 = xb[0];
131949b5e25fSSatish Balay       ib = idx + ai[i];
132049b5e25fSSatish Balay       for (j=0; j<n; j++) {
132149b5e25fSSatish Balay         rval    = ib[j];
132249b5e25fSSatish Balay         z[rval] += *v * x1;
132349b5e25fSSatish Balay         v++;
132449b5e25fSSatish Balay       }
132549b5e25fSSatish Balay       xb++;
132649b5e25fSSatish Balay     }
132749b5e25fSSatish Balay     break;
132849b5e25fSSatish Balay   case 2:
132949b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
133049b5e25fSSatish Balay       n  = ii[1] - ii[0]; ii++;
133149b5e25fSSatish Balay       x1 = xb[0]; x2 = xb[1];
133249b5e25fSSatish Balay       ib = idx + ai[i];
133349b5e25fSSatish Balay       for (j=0; j<n; j++) {
133449b5e25fSSatish Balay         rval      = ib[j]*2;
133549b5e25fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
133649b5e25fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
133749b5e25fSSatish Balay         v  += 4;
133849b5e25fSSatish Balay       }
133949b5e25fSSatish Balay       xb += 2;
134049b5e25fSSatish Balay     }
134149b5e25fSSatish Balay     break;
134249b5e25fSSatish Balay   case 3:
134349b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
134449b5e25fSSatish Balay       n  = ii[1] - ii[0]; ii++;
134549b5e25fSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
134649b5e25fSSatish Balay       ib = idx + ai[i];
134749b5e25fSSatish Balay       for (j=0; j<n; j++) {
134849b5e25fSSatish Balay         rval      = ib[j]*3;
134949b5e25fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
135049b5e25fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
135149b5e25fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
135249b5e25fSSatish Balay         v  += 9;
135349b5e25fSSatish Balay       }
135449b5e25fSSatish Balay       xb += 3;
135549b5e25fSSatish Balay     }
135649b5e25fSSatish Balay     break;
135749b5e25fSSatish Balay   case 4:
135849b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
135949b5e25fSSatish Balay       n  = ii[1] - ii[0]; ii++;
136049b5e25fSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
136149b5e25fSSatish Balay       ib = idx + ai[i];
136249b5e25fSSatish Balay       for (j=0; j<n; j++) {
136349b5e25fSSatish Balay         rval      = ib[j]*4;
136449b5e25fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
136549b5e25fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
136649b5e25fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
136749b5e25fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
136849b5e25fSSatish Balay         v  += 16;
136949b5e25fSSatish Balay       }
137049b5e25fSSatish Balay       xb += 4;
137149b5e25fSSatish Balay     }
137249b5e25fSSatish Balay     break;
137349b5e25fSSatish Balay   case 5:
137449b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
137549b5e25fSSatish Balay       n  = ii[1] - ii[0]; ii++;
137649b5e25fSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
137749b5e25fSSatish Balay       x4 = xb[3]; x5 = xb[4];
137849b5e25fSSatish Balay       ib = idx + ai[i];
137949b5e25fSSatish Balay       for (j=0; j<n; j++) {
138049b5e25fSSatish Balay         rval      = ib[j]*5;
138149b5e25fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
138249b5e25fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
138349b5e25fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
138449b5e25fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
138549b5e25fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
138649b5e25fSSatish Balay         v  += 25;
138749b5e25fSSatish Balay       }
138849b5e25fSSatish Balay       xb += 5;
138949b5e25fSSatish Balay     }
139049b5e25fSSatish Balay     break;
139149b5e25fSSatish Balay   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
139249b5e25fSSatish Balay       int       ncols,k;
139349b5e25fSSatish Balay       Scalar    *work,*workt;
139449b5e25fSSatish Balay 
139549b5e25fSSatish Balay       if (!a->mult_work) {
139649b5e25fSSatish Balay         k = PetscMax(a->m,a->n);
139749b5e25fSSatish Balay         a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
139849b5e25fSSatish Balay       }
139949b5e25fSSatish Balay       work = a->mult_work;
140049b5e25fSSatish Balay       for (i=0; i<mbs; i++) {
140149b5e25fSSatish Balay         n     = ii[1] - ii[0]; ii++;
140249b5e25fSSatish Balay         ncols = n*bs;
140349b5e25fSSatish Balay         ierr  = PetscMemzero(work,ncols*sizeof(Scalar));CHKERRQ(ierr);
140449b5e25fSSatish Balay         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
140549b5e25fSSatish Balay         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
140649b5e25fSSatish Balay         v += n*bs2;
140749b5e25fSSatish Balay         x += bs;
140849b5e25fSSatish Balay         workt = work;
140949b5e25fSSatish Balay         for (j=0; j<n; j++) {
141049b5e25fSSatish Balay           zb = z + bs*(*idx++);
141149b5e25fSSatish Balay           for (k=0; k<bs; k++) zb[k] += workt[k] ;
141249b5e25fSSatish Balay           workt += bs;
141349b5e25fSSatish Balay         }
141449b5e25fSSatish Balay       }
141549b5e25fSSatish Balay     }
141649b5e25fSSatish Balay   }
141749b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr);
141849b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr);
141949b5e25fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
142049b5e25fSSatish Balay   PetscFunctionReturn(0);
142149b5e25fSSatish Balay }
142249b5e25fSSatish Balay 
142349b5e25fSSatish Balay #undef __FUNC__
142449b5e25fSSatish Balay #define __FUNC__ "MatScale_SeqSBAIJ"
142549b5e25fSSatish Balay int MatScale_SeqSBAIJ(Scalar *alpha,Mat inA)
142649b5e25fSSatish Balay {
142749b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
142849b5e25fSSatish Balay   int         one = 1,totalnz = a->bs2*a->s_nz;
142949b5e25fSSatish Balay 
143049b5e25fSSatish Balay   PetscFunctionBegin;
143149b5e25fSSatish Balay   BLscal_(&totalnz,alpha,a->a,&one);
143249b5e25fSSatish Balay   PLogFlops(totalnz);
143349b5e25fSSatish Balay   PetscFunctionReturn(0);
143449b5e25fSSatish Balay }
143549b5e25fSSatish Balay 
143649b5e25fSSatish Balay #undef __FUNC__
143749b5e25fSSatish Balay #define __FUNC__ "MatNorm_SeqSBAIJ"
143849b5e25fSSatish Balay int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
143949b5e25fSSatish Balay {
144049b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
144149b5e25fSSatish Balay   MatScalar   *v = a->a;
144249b5e25fSSatish Balay   PetscReal   sum_diag = 0.0, sum_off = 0.0, *sum;
1443066653e3SSatish Balay   int         i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs;
144449b5e25fSSatish Balay   int         *jl,*il,jmin,jmax,ierr,nexti,ik;
144549b5e25fSSatish Balay 
144649b5e25fSSatish Balay   PetscFunctionBegin;
144749b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
144849b5e25fSSatish Balay     for (k=0; k<mbs; k++){
144949b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
145049b5e25fSSatish Balay       /* diagonal block */
145149b5e25fSSatish Balay       for (i=0; i<bs2; i++){
145249b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
145349b5e25fSSatish Balay         sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
145449b5e25fSSatish Balay #else
145549b5e25fSSatish Balay         sum_diag += (*v)*(*v); v++;
145649b5e25fSSatish Balay #endif
145749b5e25fSSatish Balay       }
145849b5e25fSSatish Balay       for (j=jmin+1; j<jmax; j++){  /* off-diagonal blocks */
145949b5e25fSSatish Balay         for (i=0; i<bs2; i++){
146049b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
146149b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
146249b5e25fSSatish Balay #else
146349b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
146449b5e25fSSatish Balay #endif
146549b5e25fSSatish Balay         }
146649b5e25fSSatish Balay       }
146749b5e25fSSatish Balay     }
146849b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
146949b5e25fSSatish Balay 
147049b5e25fSSatish Balay   }  else if (type == NORM_INFINITY) { /* maximum row sum */
147149b5e25fSSatish Balay     il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
147249b5e25fSSatish Balay     jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
1473f3dd7b2dSKris Buschelman     sum = (PetscReal*)PetscMalloc(bs*sizeof(PetscReal));CHKPTRQ(sum);
147449b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
147549b5e25fSSatish Balay       jl[i] = mbs; il[0] = 0;
147649b5e25fSSatish Balay     }
147749b5e25fSSatish Balay 
147849b5e25fSSatish Balay     *norm = 0.0;
147949b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
148049b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
148149b5e25fSSatish Balay 
148249b5e25fSSatish Balay       /*-- col sum --*/
148349b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
148449b5e25fSSatish Balay       while (i<mbs){
148549b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
148649b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
148749b5e25fSSatish Balay         for (j=0; j<bs; j++){
148849b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
148949b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
149049b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
149149b5e25fSSatish Balay           }
149249b5e25fSSatish Balay         }
149349b5e25fSSatish Balay         /* update il, jl */
149449b5e25fSSatish Balay         jmin = ik + 1; jmax = a->i[i+1];
149549b5e25fSSatish Balay         if (jmin < jmax){
149649b5e25fSSatish Balay           il[i] = jmin;
149749b5e25fSSatish Balay           j   = a->j[jmin];
149849b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
149949b5e25fSSatish Balay         }
150049b5e25fSSatish Balay         i = nexti;
150149b5e25fSSatish Balay       }
150249b5e25fSSatish Balay 
150349b5e25fSSatish Balay       /*-- row sum --*/
150449b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
150549b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
150649b5e25fSSatish Balay         for (j=0; j<bs; j++){
150749b5e25fSSatish Balay           v = a->a + i*bs2 + j;
150849b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
150949b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v);
151049b5e25fSSatish Balay             v   += bs;
151149b5e25fSSatish Balay           }
151249b5e25fSSatish Balay         }
151349b5e25fSSatish Balay       }
151449b5e25fSSatish Balay       /* add k_th block row to il, jl */
151549b5e25fSSatish Balay       jmin++;
151649b5e25fSSatish Balay       if (jmin < jmax){
151749b5e25fSSatish Balay         il[k] = jmin;
151849b5e25fSSatish Balay         j   = a->j[jmin];
151949b5e25fSSatish Balay         jl[k] = jl[j]; jl[j] = k;
152049b5e25fSSatish Balay       }
152149b5e25fSSatish Balay       for (j=0; j<bs; j++){
152249b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
152349b5e25fSSatish Balay       }
152449b5e25fSSatish Balay     }
152549b5e25fSSatish Balay     ierr = PetscFree(il);CHKERRQ(ierr);
152649b5e25fSSatish Balay     ierr = PetscFree(jl);CHKERRQ(ierr);
152749b5e25fSSatish Balay     ierr = PetscFree(sum);CHKERRQ(ierr);
152849b5e25fSSatish Balay   } else {
152949b5e25fSSatish Balay     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
153049b5e25fSSatish Balay   }
153149b5e25fSSatish Balay   PetscFunctionReturn(0);
153249b5e25fSSatish Balay }
153349b5e25fSSatish Balay 
153449b5e25fSSatish Balay #ifdef MatNorm_SeqBAIJ
153549b5e25fSSatish Balay /* This is modified MatNorm_SeqBAIJ.
153649b5e25fSSatish Balay    MatNorm_SeqBAIJ in baij/seq/baij2.c is not correct for bs>1 */
153749b5e25fSSatish Balay 
153849b5e25fSSatish Balay #undef __FUNC__
153949b5e25fSSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ"
154049b5e25fSSatish Balay int MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
154149b5e25fSSatish Balay {
154249b5e25fSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
154349b5e25fSSatish Balay   MatScalar   *v = a->a;
154449b5e25fSSatish Balay   PetscReal   sum = 0.0;
154549b5e25fSSatish Balay   int         i,j,k,bs = a->bs,nz=a->nz,bs2=a->bs2,k1;
154649b5e25fSSatish Balay 
154749b5e25fSSatish Balay   PetscFunctionBegin;
154849b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
154949b5e25fSSatish Balay     for (i=0; i< bs2*nz; i++) {
155049b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
155149b5e25fSSatish Balay       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
155249b5e25fSSatish Balay #else
155349b5e25fSSatish Balay       sum += (*v)*(*v); v++;
155449b5e25fSSatish Balay #endif
155549b5e25fSSatish Balay     }
155649b5e25fSSatish Balay     *norm = sqrt(sum);
155749b5e25fSSatish Balay   }  else if (type == NORM_INFINITY) { /* maximum row sum */
155849b5e25fSSatish Balay     *norm = 0.0;
155949b5e25fSSatish Balay     for (j=0; j<a->mbs; j++) {
156049b5e25fSSatish Balay       for (k=0; k<bs; k++) {
156149b5e25fSSatish Balay         v = a->a + bs2*a->i[j] + k;
156249b5e25fSSatish Balay         sum = 0.0;
156349b5e25fSSatish Balay         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
156449b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){   /* this loop was missing in the original code*/
156549b5e25fSSatish Balay             sum += PetscAbsScalar(*v);
156649b5e25fSSatish Balay             v   += bs;
156749b5e25fSSatish Balay           }
156849b5e25fSSatish Balay         }
156949b5e25fSSatish Balay         if (sum > *norm) *norm = sum;
157049b5e25fSSatish Balay       }
157149b5e25fSSatish Balay     }
157249b5e25fSSatish Balay   } else {
157349b5e25fSSatish Balay     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
157449b5e25fSSatish Balay   }
157549b5e25fSSatish Balay   PetscFunctionReturn(0);
157649b5e25fSSatish Balay }
157749b5e25fSSatish Balay #endif
157849b5e25fSSatish Balay 
157949b5e25fSSatish Balay #undef __FUNC__
158049b5e25fSSatish Balay #define __FUNC__ "MatEqual_SeqSBAIJ"
158149b5e25fSSatish Balay int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
158249b5e25fSSatish Balay {
158349b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
158449b5e25fSSatish Balay   int         ierr;
158549b5e25fSSatish Balay 
158649b5e25fSSatish Balay   PetscFunctionBegin;
158749b5e25fSSatish Balay   if (B->type !=MATSEQSBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
158849b5e25fSSatish Balay 
158949b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
159049b5e25fSSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| (a->s_nz != b->s_nz)) {
159149b5e25fSSatish Balay     *flg = PETSC_FALSE; PetscFunctionReturn(0);
159249b5e25fSSatish Balay   }
159349b5e25fSSatish Balay 
159449b5e25fSSatish Balay   /* if the a->i are the same */
159549b5e25fSSatish Balay   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr);
159649b5e25fSSatish Balay   if (*flg == PETSC_FALSE) {
159749b5e25fSSatish Balay     PetscFunctionReturn(0);
159849b5e25fSSatish Balay   }
159949b5e25fSSatish Balay 
160049b5e25fSSatish Balay   /* if a->j are the same */
160149b5e25fSSatish Balay   ierr = PetscMemcmp(a->j,b->j,(a->s_nz)*sizeof(int),flg);CHKERRQ(ierr);
160249b5e25fSSatish Balay   if (*flg == PETSC_FALSE) {
160349b5e25fSSatish Balay     PetscFunctionReturn(0);
160449b5e25fSSatish Balay   }
160549b5e25fSSatish Balay   /* if a->a are the same */
160649b5e25fSSatish Balay   ierr = PetscMemcmp(a->a,b->a,(a->s_nz)*(a->bs)*(a->bs)*sizeof(Scalar),flg);CHKERRQ(ierr);
160749b5e25fSSatish Balay   PetscFunctionReturn(0);
160849b5e25fSSatish Balay 
160949b5e25fSSatish Balay }
161049b5e25fSSatish Balay 
161149b5e25fSSatish Balay #undef __FUNC__
161249b5e25fSSatish Balay #define __FUNC__ "MatGetDiagonal_SeqSBAIJ"
161349b5e25fSSatish Balay int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
161449b5e25fSSatish Balay {
161549b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
161649b5e25fSSatish Balay   int         ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
161749b5e25fSSatish Balay   Scalar      *x,zero = 0.0;
161849b5e25fSSatish Balay   MatScalar   *aa,*aa_j;
161949b5e25fSSatish Balay 
162049b5e25fSSatish Balay   PetscFunctionBegin;
162149b5e25fSSatish Balay   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Not for factored matrix");
162249b5e25fSSatish Balay   bs   = a->bs;
162349b5e25fSSatish Balay   aa   = a->a;
162449b5e25fSSatish Balay   ai   = a->i;
162549b5e25fSSatish Balay   aj   = a->j;
162649b5e25fSSatish Balay   ambs = a->mbs;
162749b5e25fSSatish Balay   bs2  = a->bs2;
162849b5e25fSSatish Balay 
162949b5e25fSSatish Balay   ierr = VecSet(&zero,v);CHKERRQ(ierr);
163049b5e25fSSatish Balay   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
163149b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
163249b5e25fSSatish Balay   /* if (n != a->m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming matrix and vector");*/
163349b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
163449b5e25fSSatish Balay     j=ai[i];
163549b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
163649b5e25fSSatish Balay       row  = i*bs;
163749b5e25fSSatish Balay       aa_j = aa + j*bs2;
163849b5e25fSSatish Balay       for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
163949b5e25fSSatish Balay     }
164049b5e25fSSatish Balay   }
164149b5e25fSSatish Balay   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
164249b5e25fSSatish Balay   PetscFunctionReturn(0);
164349b5e25fSSatish Balay }
164449b5e25fSSatish Balay 
164549b5e25fSSatish Balay #undef __FUNC__
164649b5e25fSSatish Balay #define __FUNC__ "MatDiagonalScale_SeqSBAIJ"
164749b5e25fSSatish Balay int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
164849b5e25fSSatish Balay {
164949b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
165049b5e25fSSatish Balay   Scalar      *l,*r,x,*li,*ri;
165149b5e25fSSatish Balay   MatScalar   *aa,*v;
1652066653e3SSatish Balay   int         ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2;
165349b5e25fSSatish Balay 
165449b5e25fSSatish Balay   PetscFunctionBegin;
165549b5e25fSSatish Balay   ai  = a->i;
165649b5e25fSSatish Balay   aj  = a->j;
165749b5e25fSSatish Balay   aa  = a->a;
165849b5e25fSSatish Balay   m   = a->m;
165949b5e25fSSatish Balay   bs  = a->bs;
166049b5e25fSSatish Balay   mbs = a->mbs;
166149b5e25fSSatish Balay   bs2 = a->bs2;
166249b5e25fSSatish Balay 
166349b5e25fSSatish Balay   if (ll != rr) {
166449b5e25fSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n");
166549b5e25fSSatish Balay   }
166649b5e25fSSatish Balay   if (ll) {
166749b5e25fSSatish Balay     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
166849b5e25fSSatish Balay     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
166949b5e25fSSatish Balay     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector wrong length");
167049b5e25fSSatish Balay     for (i=0; i<mbs; i++) { /* for each block row */
167149b5e25fSSatish Balay       M  = ai[i+1] - ai[i];
167249b5e25fSSatish Balay       li = l + i*bs;
167349b5e25fSSatish Balay       v  = aa + bs2*ai[i];
167449b5e25fSSatish Balay       for (j=0; j<M; j++) { /* for each block */
167549b5e25fSSatish Balay         for (k=0; k<bs2; k++) {
167649b5e25fSSatish Balay           (*v++) *= li[k%bs];
167749b5e25fSSatish Balay         }
167849b5e25fSSatish Balay #ifdef CONT
167949b5e25fSSatish Balay         /* will be used to replace the above loop */
168049b5e25fSSatish Balay         ri = l + bs*aj[ai[i]+j];
168149b5e25fSSatish Balay         for (k=0; k<bs; k++) { /* column value */
168249b5e25fSSatish Balay           x = ri[k];
168349b5e25fSSatish Balay           for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
168449b5e25fSSatish Balay         }
168549b5e25fSSatish Balay #endif
168649b5e25fSSatish Balay 
168749b5e25fSSatish Balay       }
168849b5e25fSSatish Balay     }
168949b5e25fSSatish Balay     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
169049b5e25fSSatish Balay     PLogFlops(2*a->s_nz);
169149b5e25fSSatish Balay   }
169249b5e25fSSatish Balay   /* will be deleted */
169349b5e25fSSatish Balay   if (rr) {
169449b5e25fSSatish Balay     ierr = VecGetArray(rr,&r);CHKERRQ(ierr);
169549b5e25fSSatish Balay     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
169649b5e25fSSatish Balay     if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vector wrong length");
169749b5e25fSSatish Balay     for (i=0; i<mbs; i++) { /* for each block row */
169849b5e25fSSatish Balay       M  = ai[i+1] - ai[i];
169949b5e25fSSatish Balay       v  = aa + bs2*ai[i];
170049b5e25fSSatish Balay       for (j=0; j<M; j++) { /* for each block */
170149b5e25fSSatish Balay         ri = r + bs*aj[ai[i]+j];
170249b5e25fSSatish Balay         for (k=0; k<bs; k++) {
170349b5e25fSSatish Balay           x = ri[k];
170449b5e25fSSatish Balay           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
170549b5e25fSSatish Balay         }
170649b5e25fSSatish Balay       }
170749b5e25fSSatish Balay     }
170849b5e25fSSatish Balay     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
170949b5e25fSSatish Balay     PLogFlops(a->s_nz);
171049b5e25fSSatish Balay   }
171149b5e25fSSatish Balay   PetscFunctionReturn(0);
171249b5e25fSSatish Balay }
171349b5e25fSSatish Balay 
171449b5e25fSSatish Balay #undef __FUNC__
171549b5e25fSSatish Balay #define __FUNC__ "MatGetInfo_SeqSBAIJ"
171649b5e25fSSatish Balay int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
171749b5e25fSSatish Balay {
171849b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
171949b5e25fSSatish Balay 
172049b5e25fSSatish Balay   PetscFunctionBegin;
172149b5e25fSSatish Balay   info->rows_global    = (double)a->m;
172249b5e25fSSatish Balay   info->columns_global = (double)a->m;
172349b5e25fSSatish Balay   info->rows_local     = (double)a->m;
172449b5e25fSSatish Balay   info->columns_local  = (double)a->m;
172549b5e25fSSatish Balay   info->block_size     = a->bs2;
172649b5e25fSSatish Balay   info->nz_allocated   = a->s_maxnz; /*num. of nonzeros in upper triangular part */
172749b5e25fSSatish Balay   info->nz_used        = a->bs2*a->s_nz; /*num. of nonzeros in upper triangular part */
172849b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
172949b5e25fSSatish Balay   info->assemblies   = A->num_ass;
173049b5e25fSSatish Balay   info->mallocs      = a->reallocs;
173149b5e25fSSatish Balay   info->memory       = A->mem;
173249b5e25fSSatish Balay   if (A->factor) {
173349b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
173449b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
173549b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
173649b5e25fSSatish Balay   } else {
173749b5e25fSSatish Balay     info->fill_ratio_given  = 0;
173849b5e25fSSatish Balay     info->fill_ratio_needed = 0;
173949b5e25fSSatish Balay     info->factor_mallocs    = 0;
174049b5e25fSSatish Balay   }
174149b5e25fSSatish Balay   PetscFunctionReturn(0);
174249b5e25fSSatish Balay }
174349b5e25fSSatish Balay 
174449b5e25fSSatish Balay 
174549b5e25fSSatish Balay #undef __FUNC__
174649b5e25fSSatish Balay #define __FUNC__ "MatZeroEntries_SeqSBAIJ"
174749b5e25fSSatish Balay int MatZeroEntries_SeqSBAIJ(Mat A)
174849b5e25fSSatish Balay {
174949b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
175049b5e25fSSatish Balay   int         ierr;
175149b5e25fSSatish Balay 
175249b5e25fSSatish Balay   PetscFunctionBegin;
175349b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
175449b5e25fSSatish Balay   PetscFunctionReturn(0);
175549b5e25fSSatish Balay }
1756