xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision b0a32e0c6855ee6a6cd3495fa7da12ea9885bc5d)
1*b0a32e0cSBarry Smith /*$Id: sbaij2.c,v 1.25 2000/10/24 20:26:00 bsmith Exp bsmith $*/
249b5e25fSSatish Balay 
349b5e25fSSatish Balay #include "petscsys.h"
449b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h"
549b5e25fSSatish Balay #include "src/vec/vecimpl.h"
649b5e25fSSatish Balay #include "src/inline/spops.h"
749b5e25fSSatish Balay #include "src/inline/ilu.h"
849b5e25fSSatish Balay #include "petscbt.h"
949b5e25fSSatish Balay #include "sbaij.h"
1049b5e25fSSatish Balay 
1149b5e25fSSatish Balay #undef __FUNC__
1249b5e25fSSatish Balay #define __FUNC__ "MatIncreaseOverlap_SeqSBAIJ"
1349b5e25fSSatish Balay int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS *is,int ov)
1449b5e25fSSatish Balay {
1549b5e25fSSatish Balay   PetscFunctionBegin;
160eaf17bfSBarry Smith   SETERRQ(1,"Function not yet written for SBAIJ format");
175535bfb1SHong Zhang   /* PetscFunctionReturn(0); */
1849b5e25fSSatish Balay }
1949b5e25fSSatish Balay 
2049b5e25fSSatish Balay #undef __FUNC__
2149b5e25fSSatish Balay #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ_Private"
2249b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
2349b5e25fSSatish Balay {
2449b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
2549b5e25fSSatish Balay   int          *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens;
2649b5e25fSSatish Balay   int          row,mat_i,*mat_j,tcol,*mat_ilen;
2749b5e25fSSatish Balay   int          *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2;
2849b5e25fSSatish Balay   int          *aj = a->j,*ai = a->i;
2949b5e25fSSatish Balay   MatScalar    *mat_a;
3049b5e25fSSatish Balay   Mat          C;
3149b5e25fSSatish Balay   PetscTruth   flag;
3249b5e25fSSatish Balay 
3349b5e25fSSatish Balay   PetscFunctionBegin;
3449b5e25fSSatish Balay 
35347d480fSBarry Smith   if (isrow != iscol) SETERRA(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
3649b5e25fSSatish Balay   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
37347d480fSBarry Smith   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
3849b5e25fSSatish Balay 
3949b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
4049b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
4149b5e25fSSatish Balay 
42*b0a32e0cSBarry Smith ierr = PetscMalloc((1+oldcols)*sizeof(int),&  smap  );CHKERRQ(ierr);
4349b5e25fSSatish Balay   ssmap = smap;
44*b0a32e0cSBarry Smith ierr = PetscMalloc((1+nrows)*sizeof(int),&  lens  );CHKERRQ(ierr);
4549b5e25fSSatish Balay   ierr  = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr);
4649b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
4749b5e25fSSatish Balay   /* determine lens of each row */
4849b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
4949b5e25fSSatish Balay     kstart  = ai[irow[i]];
5049b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
5149b5e25fSSatish Balay     lens[i] = 0;
5249b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
5349b5e25fSSatish Balay         if (ssmap[aj[k]]) {
5449b5e25fSSatish Balay           lens[i]++;
5549b5e25fSSatish Balay         }
5649b5e25fSSatish Balay       }
5749b5e25fSSatish Balay     }
5849b5e25fSSatish Balay   /* Create and fill new matrix */
5949b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
6049b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
6149b5e25fSSatish Balay 
62347d480fSBarry Smith     if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
6349b5e25fSSatish Balay     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr);
6449b5e25fSSatish Balay     if (flag == PETSC_FALSE) {
65347d480fSBarry Smith       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
6649b5e25fSSatish Balay     }
6749b5e25fSSatish Balay     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr);
6849b5e25fSSatish Balay     C = *B;
6949b5e25fSSatish Balay   } else {
7049b5e25fSSatish Balay     ierr = MatCreateSeqSBAIJ(A->comm,bs,nrows*bs,nrows*bs,0,lens,&C);CHKERRQ(ierr);
7149b5e25fSSatish Balay   }
7249b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
7349b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
7449b5e25fSSatish Balay     row    = irow[i];
7549b5e25fSSatish Balay     kstart = ai[row];
7649b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
7749b5e25fSSatish Balay     mat_i  = c->i[i];
7849b5e25fSSatish Balay     mat_j  = c->j + mat_i;
7949b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
8049b5e25fSSatish Balay     mat_ilen = c->ilen + i;
8149b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
8249b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
8349b5e25fSSatish Balay         *mat_j++ = tcol - 1;
8449b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
8549b5e25fSSatish Balay         mat_a   += bs2;
8649b5e25fSSatish Balay         (*mat_ilen)++;
8749b5e25fSSatish Balay       }
8849b5e25fSSatish Balay     }
8949b5e25fSSatish Balay   }
9049b5e25fSSatish Balay 
9149b5e25fSSatish Balay   /* Free work space */
9249b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
9349b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
9449b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
9549b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
9649b5e25fSSatish Balay 
9749b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
9849b5e25fSSatish Balay   *B = C;
9949b5e25fSSatish Balay   PetscFunctionReturn(0);
10049b5e25fSSatish Balay }
10149b5e25fSSatish Balay 
10249b5e25fSSatish Balay #undef __FUNC__
10349b5e25fSSatish Balay #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ"
10449b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
10549b5e25fSSatish Balay {
10649b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
10749b5e25fSSatish Balay   IS          is1;
10849b5e25fSSatish Balay   int         *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count;
10949b5e25fSSatish Balay 
11049b5e25fSSatish Balay   PetscFunctionBegin;
111347d480fSBarry Smith   if (isrow != iscol) SETERRA(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
11249b5e25fSSatish Balay 
11349b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11449b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11549b5e25fSSatish Balay 
11649b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
11749b5e25fSSatish Balay    and form the IS with compressed IS */
118*b0a32e0cSBarry Smith   vary = (int*)PetscMalloc(2*(a->mbs+1)*sizeof(int));CHKERRQ(ierr);
11949b5e25fSSatish Balay   iary = vary + a->mbs;
12049b5e25fSSatish Balay   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
12149b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
12249b5e25fSSatish Balay 
12349b5e25fSSatish Balay   count = 0;
12449b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
125347d480fSBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRA(1,"Index set does not match blocks");
12649b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
12749b5e25fSSatish Balay   }
12849b5e25fSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
12949b5e25fSSatish Balay 
13049b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
13149b5e25fSSatish Balay   ierr = PetscFree(vary);CHKERRQ(ierr);
13249b5e25fSSatish Balay 
13349b5e25fSSatish Balay   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr);
13449b5e25fSSatish Balay   ISDestroy(is1);
13549b5e25fSSatish Balay   PetscFunctionReturn(0);
13649b5e25fSSatish Balay }
13749b5e25fSSatish Balay 
13849b5e25fSSatish Balay #undef __FUNC__
13949b5e25fSSatish Balay #define __FUNC__ "MatGetSubMatrices_SeqSBAIJ"
14049b5e25fSSatish Balay int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B)
14149b5e25fSSatish Balay {
14249b5e25fSSatish Balay   int ierr,i;
14349b5e25fSSatish Balay 
14449b5e25fSSatish Balay   PetscFunctionBegin;
14549b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
146*b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(Mat),&    *B );CHKERRQ(ierr);
14749b5e25fSSatish Balay   }
14849b5e25fSSatish Balay 
14949b5e25fSSatish Balay   for (i=0; i<n; i++) {
15049b5e25fSSatish Balay     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
15149b5e25fSSatish Balay   }
15249b5e25fSSatish Balay   PetscFunctionReturn(0);
15349b5e25fSSatish Balay }
15449b5e25fSSatish Balay 
15549b5e25fSSatish Balay /* -------------------------------------------------------*/
15649b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
15749b5e25fSSatish Balay /* -------------------------------------------------------*/
158d9eff348SSatish Balay #include "petscblaslapack.h"
15949b5e25fSSatish Balay 
16049b5e25fSSatish Balay #undef __FUNC__
16149b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_1"
16249b5e25fSSatish Balay int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
16349b5e25fSSatish Balay {
16449b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
16549b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,zero=0.0;
16649b5e25fSSatish Balay   MatScalar       *v;
167831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
16849b5e25fSSatish Balay 
16949b5e25fSSatish Balay   PetscFunctionBegin;
17049b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
17149b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
17249b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
17349b5e25fSSatish Balay 
17449b5e25fSSatish Balay   v  = a->a;
17549b5e25fSSatish Balay   xb = x;
17649b5e25fSSatish Balay 
17749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
17849b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
17949b5e25fSSatish Balay     x1 = xb[0];
18049b5e25fSSatish Balay     ib = aj + *ai;
181831a3094SHong Zhang     jmin = 0;
182831a3094SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
183831a3094SHong Zhang       z[i] += *v++ * x[*ib++];
184831a3094SHong Zhang       jmin++;
185831a3094SHong Zhang     }
186831a3094SHong Zhang     for (j=jmin; j<n; j++) {
18749b5e25fSSatish Balay       cval    = *ib;
18849b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
18949b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
19049b5e25fSSatish Balay     }
19149b5e25fSSatish Balay     xb++; ai++;
19249b5e25fSSatish Balay   }
19349b5e25fSSatish Balay 
19449b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
19549b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
196*b0a32e0cSBarry Smith   PetscLogFlops(2*(a->s_nz*2 - A->m) - A->m);  /* s_nz = (nz+m)/2 */
19749b5e25fSSatish Balay   PetscFunctionReturn(0);
19849b5e25fSSatish Balay }
19949b5e25fSSatish Balay 
20049b5e25fSSatish Balay #undef __FUNC__
20149b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_2"
20249b5e25fSSatish Balay int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
20349b5e25fSSatish Balay {
20449b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
20549b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,x2,zero=0.0;
20649b5e25fSSatish Balay   MatScalar       *v;
207831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
20849b5e25fSSatish Balay 
20949b5e25fSSatish Balay 
21049b5e25fSSatish Balay   PetscFunctionBegin;
21149b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
21249b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
21349b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
21449b5e25fSSatish Balay 
21549b5e25fSSatish Balay   v     = a->a;
21649b5e25fSSatish Balay   xb = x;
21749b5e25fSSatish Balay 
21849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
21949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
22049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
22149b5e25fSSatish Balay     ib = aj + *ai;
222831a3094SHong Zhang     jmin = 0;
2237fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
22449b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
22549b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
226831a3094SHong Zhang       v += 4; jmin++;
2277fbae186SHong Zhang     }
228831a3094SHong Zhang     for (j=jmin; j<n; j++) {
22949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
23049b5e25fSSatish Balay       cval       = ib[j]*2;
23149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
23249b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
23349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
23449b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
23549b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
23649b5e25fSSatish Balay       v  += 4;
23749b5e25fSSatish Balay     }
23849b5e25fSSatish Balay     xb +=2; ai++;
23949b5e25fSSatish Balay   }
24049b5e25fSSatish Balay 
24149b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
24249b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
243*b0a32e0cSBarry Smith   PetscLogFlops(8*(a->s_nz*2 - A->m) - A->m);
24449b5e25fSSatish Balay   PetscFunctionReturn(0);
24549b5e25fSSatish Balay }
24649b5e25fSSatish Balay 
24749b5e25fSSatish Balay #undef __FUNC__
24849b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_3"
24949b5e25fSSatish Balay int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
25049b5e25fSSatish Balay {
25149b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
25249b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,x2,x3,zero=0.0;
25349b5e25fSSatish Balay   MatScalar       *v;
254831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
25549b5e25fSSatish Balay 
25649b5e25fSSatish Balay 
25749b5e25fSSatish Balay   PetscFunctionBegin;
25849b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
25949b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
26049b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
26149b5e25fSSatish Balay 
26249b5e25fSSatish Balay   v     = a->a;
26349b5e25fSSatish Balay   xb = x;
26449b5e25fSSatish Balay 
26549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
26649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
26749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
26849b5e25fSSatish Balay     ib = aj + *ai;
269831a3094SHong Zhang     jmin = 0;
2707fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
27149b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
27249b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
27349b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
274831a3094SHong Zhang       v += 9; jmin++;
2757fbae186SHong Zhang     }
276831a3094SHong Zhang     for (j=jmin; j<n; j++) {
27749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
27849b5e25fSSatish Balay       cval       = ib[j]*3;
27949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
28049b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
28149b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
28249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
28349b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
28449b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
28549b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
28649b5e25fSSatish Balay       v  += 9;
28749b5e25fSSatish Balay     }
28849b5e25fSSatish Balay     xb +=3; ai++;
28949b5e25fSSatish Balay   }
29049b5e25fSSatish Balay 
29149b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
29249b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
293*b0a32e0cSBarry Smith   PetscLogFlops(18*(a->s_nz*2 - A->m) - A->m);
29449b5e25fSSatish Balay   PetscFunctionReturn(0);
29549b5e25fSSatish Balay }
29649b5e25fSSatish Balay 
29749b5e25fSSatish Balay #undef __FUNC__
29849b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_4"
29949b5e25fSSatish Balay int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
30049b5e25fSSatish Balay {
30149b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
30249b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
30349b5e25fSSatish Balay   MatScalar       *v;
304831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
30549b5e25fSSatish Balay 
30649b5e25fSSatish Balay   PetscFunctionBegin;
30749b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
30849b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
30949b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
31049b5e25fSSatish Balay 
31149b5e25fSSatish Balay   v     = a->a;
31249b5e25fSSatish Balay   xb = x;
31349b5e25fSSatish Balay 
31449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
31549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
31649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
31749b5e25fSSatish Balay     ib = aj + *ai;
318831a3094SHong Zhang     jmin = 0;
3197fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
32049b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
32149b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
32249b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
32349b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
324831a3094SHong Zhang       v += 16; jmin++;
3257fbae186SHong Zhang     }
326831a3094SHong Zhang     for (j=jmin; j<n; j++) {
32749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
32849b5e25fSSatish Balay       cval       = ib[j]*4;
32949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
33049b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
33149b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
33249b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
33349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
33449b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
33549b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
33649b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
33749b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
33849b5e25fSSatish Balay       v  += 16;
33949b5e25fSSatish Balay     }
34049b5e25fSSatish Balay     xb +=4; ai++;
34149b5e25fSSatish Balay   }
34249b5e25fSSatish Balay 
34349b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
34449b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
345*b0a32e0cSBarry Smith   PetscLogFlops(32*(a->s_nz*2 - A->m) - A->m);
34649b5e25fSSatish Balay   PetscFunctionReturn(0);
34749b5e25fSSatish Balay }
34849b5e25fSSatish Balay 
34949b5e25fSSatish Balay #undef __FUNC__
35049b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_5"
35149b5e25fSSatish Balay int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
35249b5e25fSSatish Balay {
35349b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
35449b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
35549b5e25fSSatish Balay   MatScalar       *v;
356831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
35749b5e25fSSatish Balay 
35849b5e25fSSatish Balay   PetscFunctionBegin;
35949b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
36049b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
36149b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
36249b5e25fSSatish Balay 
36349b5e25fSSatish Balay   v     = a->a;
36449b5e25fSSatish Balay   xb = x;
36549b5e25fSSatish Balay 
36649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
36749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
36849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
36949b5e25fSSatish Balay     ib = aj + *ai;
370831a3094SHong Zhang     jmin = 0;
3717fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
37249b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
37349b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
37449b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
37549b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
37649b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
377831a3094SHong Zhang       v += 25; jmin++;
3787fbae186SHong Zhang     }
379831a3094SHong Zhang     for (j=jmin; j<n; j++) {
38049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
38149b5e25fSSatish Balay       cval       = ib[j]*5;
38249b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
38349b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
38449b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
38549b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
38649b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
38749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
38849b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
38949b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
39049b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
39149b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
39249b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
39349b5e25fSSatish Balay       v  += 25;
39449b5e25fSSatish Balay     }
39549b5e25fSSatish Balay     xb +=5; ai++;
39649b5e25fSSatish Balay   }
39749b5e25fSSatish Balay 
39849b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
39949b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
400*b0a32e0cSBarry Smith   PetscLogFlops(50*(a->s_nz*2 - A->m) - A->m);
40149b5e25fSSatish Balay   PetscFunctionReturn(0);
40249b5e25fSSatish Balay }
40349b5e25fSSatish Balay 
40449b5e25fSSatish Balay 
40549b5e25fSSatish Balay #undef __FUNC__
40649b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_6"
40749b5e25fSSatish Balay int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
40849b5e25fSSatish Balay {
40949b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
41049b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
41149b5e25fSSatish Balay   MatScalar       *v;
412831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
41349b5e25fSSatish Balay 
41449b5e25fSSatish Balay   PetscFunctionBegin;
41549b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
41649b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
41749b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
41849b5e25fSSatish Balay 
41949b5e25fSSatish Balay   v     = a->a;
42049b5e25fSSatish Balay   xb = x;
42149b5e25fSSatish Balay 
42249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
42349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
42449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
42549b5e25fSSatish Balay     ib = aj + *ai;
426831a3094SHong Zhang     jmin = 0;
4277fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
42849b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
42949b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
43049b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
43149b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
43249b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
43349b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
434831a3094SHong Zhang       v += 36; jmin++;
4357fbae186SHong Zhang     }
436831a3094SHong Zhang     for (j=jmin; j<n; j++) {
43749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
43849b5e25fSSatish Balay       cval       = ib[j]*6;
43949b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
44049b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
44149b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
44249b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
44349b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
44449b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
44549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
44649b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
44749b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
44849b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
44949b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
45049b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
45149b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
45249b5e25fSSatish Balay       v  += 36;
45349b5e25fSSatish Balay     }
45449b5e25fSSatish Balay     xb +=6; ai++;
45549b5e25fSSatish Balay   }
45649b5e25fSSatish Balay 
45749b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
45849b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
459*b0a32e0cSBarry Smith   PetscLogFlops(72*(a->s_nz*2 - A->m) - A->m);
46049b5e25fSSatish Balay   PetscFunctionReturn(0);
46149b5e25fSSatish Balay }
46249b5e25fSSatish Balay #undef __FUNC__
46349b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_7"
46449b5e25fSSatish Balay int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
46549b5e25fSSatish Balay {
46649b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
46749b5e25fSSatish Balay   Scalar          *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
46849b5e25fSSatish Balay   MatScalar       *v;
469831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
47049b5e25fSSatish Balay 
47149b5e25fSSatish Balay   PetscFunctionBegin;
47249b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
47349b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
47449b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
47549b5e25fSSatish Balay 
47649b5e25fSSatish Balay   v     = a->a;
47749b5e25fSSatish Balay   xb = x;
47849b5e25fSSatish Balay 
47949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
48049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
48149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
48249b5e25fSSatish Balay     ib = aj + *ai;
483831a3094SHong Zhang     jmin = 0;
4847fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
48549b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
48649b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
48749b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
48849b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
48949b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
49049b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
49149b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
492831a3094SHong Zhang       v += 49; jmin++;
4937fbae186SHong Zhang     }
494831a3094SHong Zhang     for (j=jmin; j<n; j++) {
49549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
49649b5e25fSSatish Balay       cval       = ib[j]*7;
49749b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
49849b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
49949b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
50049b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
50149b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
50249b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
50349b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
50449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
50549b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
50649b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
50749b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
50849b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
50949b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
51049b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
51149b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
51249b5e25fSSatish Balay       v  += 49;
51349b5e25fSSatish Balay     }
51449b5e25fSSatish Balay     xb +=7; ai++;
51549b5e25fSSatish Balay   }
51649b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
51749b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
518*b0a32e0cSBarry Smith   PetscLogFlops(98*(a->s_nz*2 - A->m) - A->m);
51949b5e25fSSatish Balay   PetscFunctionReturn(0);
52049b5e25fSSatish Balay }
52149b5e25fSSatish Balay 
52249b5e25fSSatish Balay /*
52349b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
52449b5e25fSSatish Balay */
52549b5e25fSSatish Balay #undef __FUNC__
52649b5e25fSSatish Balay #define __FUNC__ "MatMult_SeqSBAIJ_N"
52749b5e25fSSatish Balay int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
52849b5e25fSSatish Balay {
52949b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
530066653e3SSatish Balay   Scalar          *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
5310b60a74dSHong Zhang   MatScalar       *v;
532066653e3SSatish Balay   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
5330b60a74dSHong Zhang   int             ncols,k;
53449b5e25fSSatish Balay 
53549b5e25fSSatish Balay   PetscFunctionBegin;
53649b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
53749b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
53849b5e25fSSatish Balay   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
53949b5e25fSSatish Balay 
54049b5e25fSSatish Balay   aj   = a->j;
54149b5e25fSSatish Balay   v    = a->a;
54249b5e25fSSatish Balay   ii   = a->i;
54349b5e25fSSatish Balay 
54449b5e25fSSatish Balay   if (!a->mult_work) {
545*b0a32e0cSBarry Smith     a->mult_work = (Scalar*)PetscMalloc((A->m+1)*sizeof(Scalar));CHKERRQ(ierr);
54649b5e25fSSatish Balay   }
54749b5e25fSSatish Balay   work = a->mult_work;
54849b5e25fSSatish Balay 
54949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
55049b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
55149b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
55249b5e25fSSatish Balay 
55349b5e25fSSatish Balay     /* upper triangular part */
55449b5e25fSSatish Balay     for (j=0; j<n; j++) {
55549b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
55649b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
55749b5e25fSSatish Balay       workt += bs;
55849b5e25fSSatish Balay     }
55949b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
56049b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
56149b5e25fSSatish Balay 
56249b5e25fSSatish Balay     /* strict lower triangular part */
563831a3094SHong Zhang     idx = aj+ii[0];
564831a3094SHong Zhang     if (*idx == i){
56596b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
566831a3094SHong Zhang     }
56796b9376eSHong Zhang 
56849b5e25fSSatish Balay     if (ncols > 0){
56949b5e25fSSatish Balay       workt = work;
57049b5e25fSSatish Balay       ierr  = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr);
571831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
572831a3094SHong Zhang       for (j=0; j<n; j++) {
573831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
57449b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
57549b5e25fSSatish Balay         workt += bs;
57649b5e25fSSatish Balay       }
57749b5e25fSSatish Balay     }
57849b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
57949b5e25fSSatish Balay   }
58049b5e25fSSatish Balay 
58149b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
58249b5e25fSSatish Balay   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
583*b0a32e0cSBarry Smith   PetscLogFlops(2*(a->s_nz*2 - A->m)*bs2 - A->m);
58449b5e25fSSatish Balay   PetscFunctionReturn(0);
58549b5e25fSSatish Balay }
58649b5e25fSSatish Balay 
58749b5e25fSSatish Balay #undef __FUNC__
58849b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_1"
58949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
59049b5e25fSSatish Balay {
59149b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
59249b5e25fSSatish Balay   Scalar          *x,*y,*z,*xb,x1;
59349b5e25fSSatish Balay   MatScalar       *v;
594831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
59549b5e25fSSatish Balay 
59649b5e25fSSatish Balay   PetscFunctionBegin;
59749b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
59849b5e25fSSatish Balay   if (yy != xx) {
59949b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
60049b5e25fSSatish Balay   } else {
60149b5e25fSSatish Balay     y = x;
60249b5e25fSSatish Balay   }
60349b5e25fSSatish Balay   if (zz != yy) {
60449b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
60549b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
60649b5e25fSSatish Balay   } else {
60749b5e25fSSatish Balay     z = y;
60849b5e25fSSatish Balay   }
60949b5e25fSSatish Balay 
61049b5e25fSSatish Balay   v  = a->a;
61149b5e25fSSatish Balay   xb = x;
61249b5e25fSSatish Balay 
61349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
61449b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
61549b5e25fSSatish Balay     x1 = xb[0];
61649b5e25fSSatish Balay     ib = aj + *ai;
617831a3094SHong Zhang     jmin = 0;
618831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
619831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
620831a3094SHong Zhang     }
621831a3094SHong Zhang     for (j=jmin; j<n; j++) {
62249b5e25fSSatish Balay       cval    = *ib;
62349b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
62449b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
62549b5e25fSSatish Balay     }
62649b5e25fSSatish Balay     xb++; ai++;
62749b5e25fSSatish Balay   }
62849b5e25fSSatish Balay 
62949b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
63049b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
63149b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
63249b5e25fSSatish Balay 
633*b0a32e0cSBarry Smith   PetscLogFlops(2*(a->s_nz*2 - A->m));
63449b5e25fSSatish Balay   PetscFunctionReturn(0);
63549b5e25fSSatish Balay }
63649b5e25fSSatish Balay 
63749b5e25fSSatish Balay #undef __FUNC__
63849b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_2"
63949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
64049b5e25fSSatish Balay {
64149b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
64249b5e25fSSatish Balay   Scalar          *x,*y,*z,*xb,x1,x2;
64349b5e25fSSatish Balay   MatScalar       *v;
644831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
64549b5e25fSSatish Balay 
64649b5e25fSSatish Balay   PetscFunctionBegin;
64749b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
64849b5e25fSSatish Balay   if (yy != xx) {
64949b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
65049b5e25fSSatish Balay   } else {
65149b5e25fSSatish Balay     y = x;
65249b5e25fSSatish Balay   }
65349b5e25fSSatish Balay   if (zz != yy) {
65449b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
65549b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
65649b5e25fSSatish Balay   } else {
65749b5e25fSSatish Balay     z = y;
65849b5e25fSSatish Balay   }
65949b5e25fSSatish Balay 
66049b5e25fSSatish Balay   v     = a->a;
66149b5e25fSSatish Balay   xb = x;
66249b5e25fSSatish Balay 
66349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
66449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
66549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
66649b5e25fSSatish Balay     ib = aj + *ai;
667831a3094SHong Zhang     jmin = 0;
6687fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
66949b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
67049b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
671831a3094SHong Zhang       v += 4; jmin++;
6727fbae186SHong Zhang     }
673831a3094SHong Zhang     for (j=jmin; j<n; j++) {
67449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
67549b5e25fSSatish Balay       cval       = ib[j]*2;
67649b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
67749b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
67849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
67949b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
68049b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
68149b5e25fSSatish Balay       v  += 4;
68249b5e25fSSatish Balay     }
68349b5e25fSSatish Balay     xb +=2; ai++;
68449b5e25fSSatish Balay   }
68549b5e25fSSatish Balay 
68649b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
68749b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
68849b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
68949b5e25fSSatish Balay 
690*b0a32e0cSBarry Smith   PetscLogFlops(4*(a->s_nz*2 - A->m));
69149b5e25fSSatish Balay   PetscFunctionReturn(0);
69249b5e25fSSatish Balay }
69349b5e25fSSatish Balay 
69449b5e25fSSatish Balay #undef __FUNC__
69549b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_3"
69649b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
69749b5e25fSSatish Balay {
69849b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
69949b5e25fSSatish Balay   Scalar          *x,*y,*z,*xb,x1,x2,x3;
70049b5e25fSSatish Balay   MatScalar       *v;
701831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
70249b5e25fSSatish Balay 
70349b5e25fSSatish Balay   PetscFunctionBegin;
70449b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
70549b5e25fSSatish Balay   if (yy != xx) {
70649b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
70749b5e25fSSatish Balay   } else {
70849b5e25fSSatish Balay     y = x;
70949b5e25fSSatish Balay   }
71049b5e25fSSatish Balay   if (zz != yy) {
71149b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
71249b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
71349b5e25fSSatish Balay   } else {
71449b5e25fSSatish Balay     z = y;
71549b5e25fSSatish Balay   }
71649b5e25fSSatish Balay 
71749b5e25fSSatish Balay   v     = a->a;
71849b5e25fSSatish Balay   xb = x;
71949b5e25fSSatish Balay 
72049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
72149b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
72249b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
72349b5e25fSSatish Balay     ib = aj + *ai;
724831a3094SHong Zhang     jmin = 0;
7257fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
72649b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
72749b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
72849b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
729831a3094SHong Zhang      v += 9; jmin++;
7307fbae186SHong Zhang     }
731831a3094SHong Zhang     for (j=jmin; j<n; j++) {
73249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
73349b5e25fSSatish Balay       cval       = ib[j]*3;
73449b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
73549b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
73649b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
73749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
73849b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
73949b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
74049b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
74149b5e25fSSatish Balay       v  += 9;
74249b5e25fSSatish Balay     }
74349b5e25fSSatish Balay     xb +=3; ai++;
74449b5e25fSSatish Balay   }
74549b5e25fSSatish Balay 
74649b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
74749b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
74849b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
74949b5e25fSSatish Balay 
750*b0a32e0cSBarry Smith   PetscLogFlops(18*(a->s_nz*2 - A->m));
75149b5e25fSSatish Balay   PetscFunctionReturn(0);
75249b5e25fSSatish Balay }
75349b5e25fSSatish Balay 
75449b5e25fSSatish Balay #undef __FUNC__
75549b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_4"
75649b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
75749b5e25fSSatish Balay {
75849b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
75949b5e25fSSatish Balay   Scalar          *x,*y,*z,*xb,x1,x2,x3,x4;
76049b5e25fSSatish Balay   MatScalar       *v;
761831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
76249b5e25fSSatish Balay 
76349b5e25fSSatish Balay   PetscFunctionBegin;
76449b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
76549b5e25fSSatish Balay   if (yy != xx) {
76649b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
76749b5e25fSSatish Balay   } else {
76849b5e25fSSatish Balay     y = x;
76949b5e25fSSatish Balay   }
77049b5e25fSSatish Balay   if (zz != yy) {
77149b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
77249b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
77349b5e25fSSatish Balay   } else {
77449b5e25fSSatish Balay     z = y;
77549b5e25fSSatish Balay   }
77649b5e25fSSatish Balay 
77749b5e25fSSatish Balay   v     = a->a;
77849b5e25fSSatish Balay   xb = x;
77949b5e25fSSatish Balay 
78049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
78149b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
78249b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
78349b5e25fSSatish Balay     ib = aj + *ai;
784831a3094SHong Zhang     jmin = 0;
7857fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
78649b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
78749b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
78849b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
78949b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
790831a3094SHong Zhang       v += 16; jmin++;
7917fbae186SHong Zhang     }
792831a3094SHong Zhang     for (j=jmin; j<n; j++) {
79349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
79449b5e25fSSatish Balay       cval       = ib[j]*4;
79549b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
79649b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
79749b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
79849b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
79949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
80049b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
80149b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
80249b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
80349b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
80449b5e25fSSatish Balay       v  += 16;
80549b5e25fSSatish Balay     }
80649b5e25fSSatish Balay     xb +=4; ai++;
80749b5e25fSSatish Balay   }
80849b5e25fSSatish Balay 
80949b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
81049b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
81149b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
81249b5e25fSSatish Balay 
813*b0a32e0cSBarry Smith   PetscLogFlops(32*(a->s_nz*2 - A->m));
81449b5e25fSSatish Balay   PetscFunctionReturn(0);
81549b5e25fSSatish Balay }
81649b5e25fSSatish Balay 
81749b5e25fSSatish Balay #undef __FUNC__
81849b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_5"
81949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
82049b5e25fSSatish Balay {
82149b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
82249b5e25fSSatish Balay   Scalar          *x,*y,*z,*xb,x1,x2,x3,x4,x5;
82349b5e25fSSatish Balay   MatScalar       *v;
824831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
82549b5e25fSSatish Balay 
82649b5e25fSSatish Balay   PetscFunctionBegin;
82749b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
82849b5e25fSSatish Balay   if (yy != xx) {
82949b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
83049b5e25fSSatish Balay   } else {
83149b5e25fSSatish Balay     y = x;
83249b5e25fSSatish Balay   }
83349b5e25fSSatish Balay   if (zz != yy) {
83449b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
83549b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
83649b5e25fSSatish Balay   } else {
83749b5e25fSSatish Balay     z = y;
83849b5e25fSSatish Balay   }
83949b5e25fSSatish Balay 
84049b5e25fSSatish Balay   v     = a->a;
84149b5e25fSSatish Balay   xb = x;
84249b5e25fSSatish Balay 
84349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
84449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
84549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
84649b5e25fSSatish Balay     ib = aj + *ai;
847831a3094SHong Zhang     jmin = 0;
8487fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
84949b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
85049b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
85149b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
85249b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
85349b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
854831a3094SHong Zhang       v += 25; jmin++;
8557fbae186SHong Zhang     }
856831a3094SHong Zhang     for (j=jmin; j<n; j++) {
85749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
85849b5e25fSSatish Balay       cval       = ib[j]*5;
85949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
86049b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
86149b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
86249b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
86349b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
86449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
86549b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
86649b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
86749b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
86849b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
86949b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
87049b5e25fSSatish Balay       v  += 25;
87149b5e25fSSatish Balay     }
87249b5e25fSSatish Balay     xb +=5; ai++;
87349b5e25fSSatish Balay   }
87449b5e25fSSatish Balay 
87549b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
87649b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
87749b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
87849b5e25fSSatish Balay 
879*b0a32e0cSBarry Smith   PetscLogFlops(50*(a->s_nz*2 - A->m));
88049b5e25fSSatish Balay   PetscFunctionReturn(0);
88149b5e25fSSatish Balay }
88249b5e25fSSatish Balay #undef __FUNC__
88349b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_6"
88449b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
88549b5e25fSSatish Balay {
88649b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
88749b5e25fSSatish Balay   Scalar          *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6;
88849b5e25fSSatish Balay   MatScalar       *v;
889831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
89049b5e25fSSatish Balay 
89149b5e25fSSatish Balay   PetscFunctionBegin;
89249b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
89349b5e25fSSatish Balay   if (yy != xx) {
89449b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
89549b5e25fSSatish Balay   } else {
89649b5e25fSSatish Balay     y = x;
89749b5e25fSSatish Balay   }
89849b5e25fSSatish Balay   if (zz != yy) {
89949b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
90049b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
90149b5e25fSSatish Balay   } else {
90249b5e25fSSatish Balay     z = y;
90349b5e25fSSatish Balay   }
90449b5e25fSSatish Balay 
90549b5e25fSSatish Balay   v     = a->a;
90649b5e25fSSatish Balay   xb = x;
90749b5e25fSSatish Balay 
90849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
90949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
91049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
91149b5e25fSSatish Balay     ib = aj + *ai;
912831a3094SHong Zhang     jmin = 0;
9137fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
91449b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
91549b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
91649b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
91749b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
91849b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
91949b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
920831a3094SHong Zhang       v += 36; jmin++;
9217fbae186SHong Zhang     }
922831a3094SHong Zhang     for (j=jmin; j<n; j++) {
92349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
92449b5e25fSSatish Balay       cval       = ib[j]*6;
92549b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
92649b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
92749b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
92849b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
92949b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
93049b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
93149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
93249b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
93349b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
93449b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
93549b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
93649b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
93749b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
93849b5e25fSSatish Balay       v  += 36;
93949b5e25fSSatish Balay     }
94049b5e25fSSatish Balay     xb +=6; ai++;
94149b5e25fSSatish Balay   }
94249b5e25fSSatish Balay 
94349b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
94449b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
94549b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
94649b5e25fSSatish Balay 
947*b0a32e0cSBarry Smith   PetscLogFlops(72*(a->s_nz*2 - A->m));
94849b5e25fSSatish Balay   PetscFunctionReturn(0);
94949b5e25fSSatish Balay }
95049b5e25fSSatish Balay 
95149b5e25fSSatish Balay #undef __FUNC__
95249b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_7"
95349b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
95449b5e25fSSatish Balay {
95549b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
95649b5e25fSSatish Balay   Scalar          *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
95749b5e25fSSatish Balay   MatScalar       *v;
958831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
95949b5e25fSSatish Balay 
96049b5e25fSSatish Balay   PetscFunctionBegin;
96149b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
96249b5e25fSSatish Balay   if (yy != xx) {
96349b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
96449b5e25fSSatish Balay   } else {
96549b5e25fSSatish Balay     y = x;
96649b5e25fSSatish Balay   }
96749b5e25fSSatish Balay   if (zz != yy) {
96849b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
96949b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
97049b5e25fSSatish Balay   } else {
97149b5e25fSSatish Balay     z = y;
97249b5e25fSSatish Balay   }
97349b5e25fSSatish Balay 
97449b5e25fSSatish Balay   v     = a->a;
97549b5e25fSSatish Balay   xb = x;
97649b5e25fSSatish Balay 
97749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
97849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
97949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
98049b5e25fSSatish Balay     ib = aj + *ai;
981831a3094SHong Zhang     jmin = 0;
9827fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
98349b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
98449b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
98549b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
98649b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
98749b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
98849b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
98949b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
990831a3094SHong Zhang       v += 49; jmin++;
9917fbae186SHong Zhang     }
992831a3094SHong Zhang     for (j=jmin; j<n; j++) {
99349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
99449b5e25fSSatish Balay       cval       = ib[j]*7;
99549b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
99649b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
99749b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
99849b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
99949b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
100049b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
100149b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
100249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
100349b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
100449b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
100549b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
100649b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
100749b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
100849b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
100949b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
101049b5e25fSSatish Balay       v  += 49;
101149b5e25fSSatish Balay     }
101249b5e25fSSatish Balay     xb +=7; ai++;
101349b5e25fSSatish Balay   }
101449b5e25fSSatish Balay 
101549b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
101649b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
101749b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
101849b5e25fSSatish Balay 
1019*b0a32e0cSBarry Smith   PetscLogFlops(98*(a->s_nz*2 - A->m));
102049b5e25fSSatish Balay   PetscFunctionReturn(0);
102149b5e25fSSatish Balay }
102249b5e25fSSatish Balay 
102349b5e25fSSatish Balay #undef __FUNC__
102449b5e25fSSatish Balay #define __FUNC__ "MatMultAdd_SeqSBAIJ_N"
102549b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
102649b5e25fSSatish Balay {
102749b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
1028ecfa4421SSatish Balay   Scalar          *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1029066653e3SSatish Balay   MatScalar       *v;
1030066653e3SSatish Balay   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
1031066653e3SSatish Balay   int             ncols,k;
103249b5e25fSSatish Balay 
103349b5e25fSSatish Balay   PetscFunctionBegin;
103449b5e25fSSatish Balay   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
103549b5e25fSSatish Balay   if (yy != xx) {
103649b5e25fSSatish Balay     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
103749b5e25fSSatish Balay   } else {
103849b5e25fSSatish Balay     y = x;
103949b5e25fSSatish Balay   }
104049b5e25fSSatish Balay   if (zz != yy) {
104149b5e25fSSatish Balay     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
104249b5e25fSSatish Balay     ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
104349b5e25fSSatish Balay   } else {
104449b5e25fSSatish Balay     z = y;
104549b5e25fSSatish Balay   }
104649b5e25fSSatish Balay 
104749b5e25fSSatish Balay   aj   = a->j;
104849b5e25fSSatish Balay   v    = a->a;
104949b5e25fSSatish Balay   ii   = a->i;
105049b5e25fSSatish Balay 
105149b5e25fSSatish Balay   if (!a->mult_work) {
1052*b0a32e0cSBarry Smith     a->mult_work = (Scalar*)PetscMalloc((A->m+1)*sizeof(Scalar));CHKERRQ(ierr);
105349b5e25fSSatish Balay   }
105449b5e25fSSatish Balay   work = a->mult_work;
105549b5e25fSSatish Balay 
105649b5e25fSSatish Balay 
105749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
105849b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
105949b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
106049b5e25fSSatish Balay 
106149b5e25fSSatish Balay     /* upper triangular part */
106249b5e25fSSatish Balay     for (j=0; j<n; j++) {
106349b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
106449b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
106549b5e25fSSatish Balay       workt += bs;
106649b5e25fSSatish Balay     }
106749b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
106849b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
106949b5e25fSSatish Balay 
107049b5e25fSSatish Balay     /* strict lower triangular part */
1071831a3094SHong Zhang     idx = aj+ii[0];
1072831a3094SHong Zhang     if (*idx == i){
107396b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1074831a3094SHong Zhang     }
107549b5e25fSSatish Balay     if (ncols > 0){
107649b5e25fSSatish Balay       workt = work;
107749b5e25fSSatish Balay       ierr  = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr);
1078831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1079831a3094SHong Zhang       for (j=0; j<n; j++) {
1080831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
1081831a3094SHong Zhang         /* idx++; */
108249b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
108349b5e25fSSatish Balay         workt += bs;
108449b5e25fSSatish Balay       }
108549b5e25fSSatish Balay     }
108649b5e25fSSatish Balay 
108749b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
108849b5e25fSSatish Balay   }
108949b5e25fSSatish Balay 
109049b5e25fSSatish Balay   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
109149b5e25fSSatish Balay   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
109249b5e25fSSatish Balay   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
109349b5e25fSSatish Balay 
1094*b0a32e0cSBarry Smith   PetscLogFlops(2*(a->s_nz*2 - A->m));
109549b5e25fSSatish Balay   PetscFunctionReturn(0);
109649b5e25fSSatish Balay }
109749b5e25fSSatish Balay 
109849b5e25fSSatish Balay #undef __FUNC__
109949b5e25fSSatish Balay #define __FUNC__ "MatMultTranspose_SeqSBAIJ"
110049b5e25fSSatish Balay int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz)
110149b5e25fSSatish Balay {
110249b5e25fSSatish Balay   PetscFunctionBegin;
11030eaf17bfSBarry Smith   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
11045535bfb1SHong Zhang   /* PetscFunctionReturn(0); */
110549b5e25fSSatish Balay }
110649b5e25fSSatish Balay 
110749b5e25fSSatish Balay #undef __FUNC__
110849b5e25fSSatish Balay #define __FUNC__ "MatMultTransposeAdd_SeqSBAIJ"
110949b5e25fSSatish Balay int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
111049b5e25fSSatish Balay 
111149b5e25fSSatish Balay {
111249b5e25fSSatish Balay   PetscFunctionBegin;
11130eaf17bfSBarry Smith   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
11145535bfb1SHong Zhang   /* PetscFunctionReturn(0); */
111549b5e25fSSatish Balay }
111649b5e25fSSatish Balay 
111749b5e25fSSatish Balay #undef __FUNC__
111849b5e25fSSatish Balay #define __FUNC__ "MatScale_SeqSBAIJ"
111949b5e25fSSatish Balay int MatScale_SeqSBAIJ(Scalar *alpha,Mat inA)
112049b5e25fSSatish Balay {
112149b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
112249b5e25fSSatish Balay   int         one = 1,totalnz = a->bs2*a->s_nz;
112349b5e25fSSatish Balay 
112449b5e25fSSatish Balay   PetscFunctionBegin;
112549b5e25fSSatish Balay   BLscal_(&totalnz,alpha,a->a,&one);
1126*b0a32e0cSBarry Smith   PetscLogFlops(totalnz);
112749b5e25fSSatish Balay   PetscFunctionReturn(0);
112849b5e25fSSatish Balay }
112949b5e25fSSatish Balay 
113049b5e25fSSatish Balay #undef __FUNC__
113149b5e25fSSatish Balay #define __FUNC__ "MatNorm_SeqSBAIJ"
113249b5e25fSSatish Balay int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
113349b5e25fSSatish Balay {
113449b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
113549b5e25fSSatish Balay   MatScalar   *v = a->a;
113649b5e25fSSatish Balay   PetscReal   sum_diag = 0.0, sum_off = 0.0, *sum;
1137831a3094SHong Zhang   int         i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1138831a3094SHong Zhang   int         *jl,*il,jmin,jmax,ierr,nexti,ik,*col;
113949b5e25fSSatish Balay 
114049b5e25fSSatish Balay   PetscFunctionBegin;
114149b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
114249b5e25fSSatish Balay     for (k=0; k<mbs; k++){
114349b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1144831a3094SHong Zhang       col  = aj + jmin;
1145831a3094SHong Zhang       if (*col == k){         /* diagonal block */
114649b5e25fSSatish Balay         for (i=0; i<bs2; i++){
114749b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
114849b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
114949b5e25fSSatish Balay #else
115049b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
115149b5e25fSSatish Balay #endif
115249b5e25fSSatish Balay         }
1153831a3094SHong Zhang         jmin++;
1154831a3094SHong Zhang       }
1155831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
115649b5e25fSSatish Balay         for (i=0; i<bs2; i++){
115749b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
115849b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
115949b5e25fSSatish Balay #else
116049b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
116149b5e25fSSatish Balay #endif
116249b5e25fSSatish Balay         }
116349b5e25fSSatish Balay       }
116449b5e25fSSatish Balay     }
116549b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
116649b5e25fSSatish Balay 
116749b5e25fSSatish Balay   }  else if (type == NORM_INFINITY) { /* maximum row sum */
1168*b0a32e0cSBarry Smith ierr = PetscMalloc(mbs*sizeof(int),&(    il ));CHKERRQ(ierr);
1169*b0a32e0cSBarry Smith ierr = PetscMalloc(mbs*sizeof(int),&(    jl ));CHKERRQ(ierr);
1170*b0a32e0cSBarry Smith ierr = PetscMalloc(bs*sizeof(PetscReal),&(    sum ));CHKERRQ(ierr);
117149b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
117249b5e25fSSatish Balay       jl[i] = mbs; il[0] = 0;
117349b5e25fSSatish Balay     }
117449b5e25fSSatish Balay 
117549b5e25fSSatish Balay     *norm = 0.0;
117649b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
117749b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
117849b5e25fSSatish Balay 
117949b5e25fSSatish Balay       /*-- col sum --*/
118049b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1181831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1182831a3094SHong Zhang                   at step k */
118349b5e25fSSatish Balay       while (i<mbs){
118449b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
118549b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
118649b5e25fSSatish Balay         for (j=0; j<bs; j++){
118749b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
118849b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
118949b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
119049b5e25fSSatish Balay           }
119149b5e25fSSatish Balay         }
119249b5e25fSSatish Balay         /* update il, jl */
1193831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1194831a3094SHong Zhang         jmax = a->i[i+1];
119549b5e25fSSatish Balay         if (jmin < jmax){
119649b5e25fSSatish Balay           il[i] = jmin;
119749b5e25fSSatish Balay           j   = a->j[jmin];
119849b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
119949b5e25fSSatish Balay         }
120049b5e25fSSatish Balay         i = nexti;
120149b5e25fSSatish Balay       }
120249b5e25fSSatish Balay 
120349b5e25fSSatish Balay       /*-- row sum --*/
120449b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
120549b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
120649b5e25fSSatish Balay         for (j=0; j<bs; j++){
120749b5e25fSSatish Balay           v = a->a + i*bs2 + j;
120849b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
120949b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v);
121049b5e25fSSatish Balay             v   += bs;
121149b5e25fSSatish Balay           }
121249b5e25fSSatish Balay         }
121349b5e25fSSatish Balay       }
121449b5e25fSSatish Balay       /* add k_th block row to il, jl */
1215831a3094SHong Zhang       col = aj+jmin;
1216831a3094SHong Zhang       if (*col == k) jmin++;
121749b5e25fSSatish Balay       if (jmin < jmax){
121849b5e25fSSatish Balay         il[k] = jmin;
121949b5e25fSSatish Balay         j   = a->j[jmin];
122049b5e25fSSatish Balay         jl[k] = jl[j]; jl[j] = k;
122149b5e25fSSatish Balay       }
122249b5e25fSSatish Balay       for (j=0; j<bs; j++){
122349b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
122449b5e25fSSatish Balay       }
122549b5e25fSSatish Balay     }
122649b5e25fSSatish Balay     ierr = PetscFree(il);CHKERRQ(ierr);
122749b5e25fSSatish Balay     ierr = PetscFree(jl);CHKERRQ(ierr);
122849b5e25fSSatish Balay     ierr = PetscFree(sum);CHKERRQ(ierr);
122949b5e25fSSatish Balay   } else {
1230347d480fSBarry Smith     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
123149b5e25fSSatish Balay   }
123249b5e25fSSatish Balay   PetscFunctionReturn(0);
123349b5e25fSSatish Balay }
123449b5e25fSSatish Balay 
123549b5e25fSSatish Balay #undef __FUNC__
123649b5e25fSSatish Balay #define __FUNC__ "MatEqual_SeqSBAIJ"
123749b5e25fSSatish Balay int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
123849b5e25fSSatish Balay {
123949b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
124049b5e25fSSatish Balay   int          ierr;
1241ef511fbeSHong Zhang   PetscTruth   flag;
124249b5e25fSSatish Balay 
124349b5e25fSSatish Balay   PetscFunctionBegin;
1244ef511fbeSHong Zhang   ierr = PetscTypeCompare((PetscObject)A,MATSEQBAIJ,&flag);CHKERRQ(ierr);
1245ef511fbeSHong Zhang   if (!flag) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
124649b5e25fSSatish Balay 
124749b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1248ef511fbeSHong Zhang   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->s_nz != b->s_nz)) {
1249ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1250ef511fbeSHong Zhang     PetscFunctionReturn(0);
125149b5e25fSSatish Balay   }
125249b5e25fSSatish Balay 
125349b5e25fSSatish Balay   /* if the a->i are the same */
125449b5e25fSSatish Balay   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr);
125549b5e25fSSatish Balay   if (*flg == PETSC_FALSE) {
125649b5e25fSSatish Balay     PetscFunctionReturn(0);
125749b5e25fSSatish Balay   }
125849b5e25fSSatish Balay 
125949b5e25fSSatish Balay   /* if a->j are the same */
126049b5e25fSSatish Balay   ierr = PetscMemcmp(a->j,b->j,(a->s_nz)*sizeof(int),flg);CHKERRQ(ierr);
126149b5e25fSSatish Balay   if (*flg == PETSC_FALSE) {
126249b5e25fSSatish Balay     PetscFunctionReturn(0);
126349b5e25fSSatish Balay   }
126449b5e25fSSatish Balay   /* if a->a are the same */
126549b5e25fSSatish Balay   ierr = PetscMemcmp(a->a,b->a,(a->s_nz)*(a->bs)*(a->bs)*sizeof(Scalar),flg);CHKERRQ(ierr);
126649b5e25fSSatish Balay   PetscFunctionReturn(0);
126749b5e25fSSatish Balay 
126849b5e25fSSatish Balay }
126949b5e25fSSatish Balay 
127049b5e25fSSatish Balay #undef __FUNC__
127149b5e25fSSatish Balay #define __FUNC__ "MatGetDiagonal_SeqSBAIJ"
127249b5e25fSSatish Balay int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
127349b5e25fSSatish Balay {
127449b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
127549b5e25fSSatish Balay   int         ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
127649b5e25fSSatish Balay   Scalar      *x,zero = 0.0;
127749b5e25fSSatish Balay   MatScalar   *aa,*aa_j;
127849b5e25fSSatish Balay 
127949b5e25fSSatish Balay   PetscFunctionBegin;
1280347d480fSBarry Smith   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
128149b5e25fSSatish Balay   bs   = a->bs;
128249b5e25fSSatish Balay   aa   = a->a;
128349b5e25fSSatish Balay   ai   = a->i;
128449b5e25fSSatish Balay   aj   = a->j;
128549b5e25fSSatish Balay   ambs = a->mbs;
128649b5e25fSSatish Balay   bs2  = a->bs2;
128749b5e25fSSatish Balay 
128849b5e25fSSatish Balay   ierr = VecSet(&zero,v);CHKERRQ(ierr);
128949b5e25fSSatish Balay   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
129049b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1291ef511fbeSHong Zhang   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
129249b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
129349b5e25fSSatish Balay     j=ai[i];
129449b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
129549b5e25fSSatish Balay       row  = i*bs;
129649b5e25fSSatish Balay       aa_j = aa + j*bs2;
129749b5e25fSSatish Balay       for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
129849b5e25fSSatish Balay     }
129949b5e25fSSatish Balay   }
130049b5e25fSSatish Balay   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
130149b5e25fSSatish Balay   PetscFunctionReturn(0);
130249b5e25fSSatish Balay }
130349b5e25fSSatish Balay 
130449b5e25fSSatish Balay #undef __FUNC__
130549b5e25fSSatish Balay #define __FUNC__ "MatDiagonalScale_SeqSBAIJ"
130649b5e25fSSatish Balay int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
130749b5e25fSSatish Balay {
130849b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
130949b5e25fSSatish Balay   Scalar      *l,*r,x,*li,*ri;
131049b5e25fSSatish Balay   MatScalar   *aa,*v;
1311066653e3SSatish Balay   int         ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2;
131249b5e25fSSatish Balay 
131349b5e25fSSatish Balay   PetscFunctionBegin;
131449b5e25fSSatish Balay   ai  = a->i;
131549b5e25fSSatish Balay   aj  = a->j;
131649b5e25fSSatish Balay   aa  = a->a;
1317ef511fbeSHong Zhang   m   = A->m;
131849b5e25fSSatish Balay   bs  = a->bs;
131949b5e25fSSatish Balay   mbs = a->mbs;
132049b5e25fSSatish Balay   bs2 = a->bs2;
132149b5e25fSSatish Balay 
132249b5e25fSSatish Balay   if (ll != rr) {
1323347d480fSBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
132449b5e25fSSatish Balay   }
132549b5e25fSSatish Balay   if (ll) {
132649b5e25fSSatish Balay     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
132749b5e25fSSatish Balay     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1328347d480fSBarry Smith     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
132949b5e25fSSatish Balay     for (i=0; i<mbs; i++) { /* for each block row */
133049b5e25fSSatish Balay       M  = ai[i+1] - ai[i];
133149b5e25fSSatish Balay       li = l + i*bs;
133249b5e25fSSatish Balay       v  = aa + bs2*ai[i];
133349b5e25fSSatish Balay       for (j=0; j<M; j++) { /* for each block */
133449b5e25fSSatish Balay         for (k=0; k<bs2; k++) {
133549b5e25fSSatish Balay           (*v++) *= li[k%bs];
133649b5e25fSSatish Balay         }
133749b5e25fSSatish Balay #ifdef CONT
133849b5e25fSSatish Balay         /* will be used to replace the above loop */
133949b5e25fSSatish Balay         ri = l + bs*aj[ai[i]+j];
134049b5e25fSSatish Balay         for (k=0; k<bs; k++) { /* column value */
134149b5e25fSSatish Balay           x = ri[k];
134249b5e25fSSatish Balay           for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
134349b5e25fSSatish Balay         }
134449b5e25fSSatish Balay #endif
134549b5e25fSSatish Balay 
134649b5e25fSSatish Balay       }
134749b5e25fSSatish Balay     }
134849b5e25fSSatish Balay     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1349*b0a32e0cSBarry Smith     PetscLogFlops(2*a->s_nz);
135049b5e25fSSatish Balay   }
135149b5e25fSSatish Balay   /* will be deleted */
135249b5e25fSSatish Balay   if (rr) {
135349b5e25fSSatish Balay     ierr = VecGetArray(rr,&r);CHKERRQ(ierr);
135449b5e25fSSatish Balay     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1355347d480fSBarry Smith     if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
135649b5e25fSSatish Balay     for (i=0; i<mbs; i++) { /* for each block row */
135749b5e25fSSatish Balay       M  = ai[i+1] - ai[i];
135849b5e25fSSatish Balay       v  = aa + bs2*ai[i];
135949b5e25fSSatish Balay       for (j=0; j<M; j++) { /* for each block */
136049b5e25fSSatish Balay         ri = r + bs*aj[ai[i]+j];
136149b5e25fSSatish Balay         for (k=0; k<bs; k++) {
136249b5e25fSSatish Balay           x = ri[k];
136349b5e25fSSatish Balay           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
136449b5e25fSSatish Balay         }
136549b5e25fSSatish Balay       }
136649b5e25fSSatish Balay     }
136749b5e25fSSatish Balay     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
1368*b0a32e0cSBarry Smith     PetscLogFlops(a->s_nz);
136949b5e25fSSatish Balay   }
137049b5e25fSSatish Balay   PetscFunctionReturn(0);
137149b5e25fSSatish Balay }
137249b5e25fSSatish Balay 
137349b5e25fSSatish Balay #undef __FUNC__
137449b5e25fSSatish Balay #define __FUNC__ "MatGetInfo_SeqSBAIJ"
137549b5e25fSSatish Balay int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
137649b5e25fSSatish Balay {
137749b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
137849b5e25fSSatish Balay 
137949b5e25fSSatish Balay   PetscFunctionBegin;
1380ef511fbeSHong Zhang   info->rows_global    = (double)A->m;
1381ef511fbeSHong Zhang   info->columns_global = (double)A->m;
1382ef511fbeSHong Zhang   info->rows_local     = (double)A->m;
1383ef511fbeSHong Zhang   info->columns_local  = (double)A->m;
138449b5e25fSSatish Balay   info->block_size     = a->bs2;
138549b5e25fSSatish Balay   info->nz_allocated   = a->s_maxnz; /*num. of nonzeros in upper triangular part */
138649b5e25fSSatish Balay   info->nz_used        = a->bs2*a->s_nz; /*num. of nonzeros in upper triangular part */
138749b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
138849b5e25fSSatish Balay   info->assemblies   = A->num_ass;
138949b5e25fSSatish Balay   info->mallocs      = a->reallocs;
139049b5e25fSSatish Balay   info->memory       = A->mem;
139149b5e25fSSatish Balay   if (A->factor) {
139249b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
139349b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
139449b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
139549b5e25fSSatish Balay   } else {
139649b5e25fSSatish Balay     info->fill_ratio_given  = 0;
139749b5e25fSSatish Balay     info->fill_ratio_needed = 0;
139849b5e25fSSatish Balay     info->factor_mallocs    = 0;
139949b5e25fSSatish Balay   }
140049b5e25fSSatish Balay   PetscFunctionReturn(0);
140149b5e25fSSatish Balay }
140249b5e25fSSatish Balay 
140349b5e25fSSatish Balay 
140449b5e25fSSatish Balay #undef __FUNC__
140549b5e25fSSatish Balay #define __FUNC__ "MatZeroEntries_SeqSBAIJ"
140649b5e25fSSatish Balay int MatZeroEntries_SeqSBAIJ(Mat A)
140749b5e25fSSatish Balay {
140849b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
140949b5e25fSSatish Balay   int         ierr;
141049b5e25fSSatish Balay 
141149b5e25fSSatish Balay   PetscFunctionBegin;
141249b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
141349b5e25fSSatish Balay   PetscFunctionReturn(0);
141449b5e25fSSatish Balay }
1415dc354874SHong Zhang 
1416dc354874SHong Zhang #undef __FUNC__
1417dc354874SHong Zhang #define __FUNC__ "MatGetRowMax_SeqSBAIJ"
1418dc354874SHong Zhang int MatGetRowMax_SeqSBAIJ(Mat A,Vec v)
1419dc354874SHong Zhang {
1420dc354874SHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1421d0f6400bSHong Zhang   int          ierr,i,j,n,row,col,bs,*ai,*aj,mbs;
1422c3fca9a7SHong Zhang   PetscReal    atmp;
1423273d9f13SBarry Smith   MatScalar    *aa;
1424273d9f13SBarry Smith   Scalar       zero = 0.0,*x;
1425d0f6400bSHong Zhang   int          ncols,brow,bcol,krow,kcol;
1426dc354874SHong Zhang 
1427dc354874SHong Zhang   PetscFunctionBegin;
1428dc354874SHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1429dc354874SHong Zhang   bs   = a->bs;
1430dc354874SHong Zhang   aa   = a->a;
1431dc354874SHong Zhang   ai   = a->i;
1432dc354874SHong Zhang   aj   = a->j;
143344117c81SHong Zhang   mbs = a->mbs;
1434dc354874SHong Zhang 
1435dc354874SHong Zhang   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1436dc354874SHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1437dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1438ef511fbeSHong Zhang   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
143944117c81SHong Zhang   for (i=0; i<mbs; i++) {
1440d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1441d0f6400bSHong Zhang     brow  = bs*i;
144244117c81SHong Zhang     for (j=0; j<ncols; j++){
1443d0f6400bSHong Zhang       bcol = bs*(*aj);
144444117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1445d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
144644117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1447d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1448d0f6400bSHong Zhang           row = brow + krow;    /* row index */
144944117c81SHong Zhang           /* printf("val[%d,%d]: %g\n",row,col,atmp); */
1450c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1451c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
145244117c81SHong Zhang         }
145344117c81SHong Zhang       }
1454d0f6400bSHong Zhang       aj++;
1455dc354874SHong Zhang     }
1456dc354874SHong Zhang   }
1457dc354874SHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1458dc354874SHong Zhang   PetscFunctionReturn(0);
1459dc354874SHong Zhang }
1460