xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision 5eee224d2838b018c45d75ab7aa92e5d8b61a5ea)
173f4d377SMatthew Knepley /*$Id: sbaij2.c,v 1.32 2001/08/07 03:03:01 balay Exp $*/
249b5e25fSSatish Balay 
349b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h"
449b5e25fSSatish Balay #include "src/inline/spops.h"
549b5e25fSSatish Balay #include "src/inline/ilu.h"
649b5e25fSSatish Balay #include "petscbt.h"
73a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h"
849b5e25fSSatish Balay 
9*5eee224dSHong Zhang /* MatIncreaseOverlap_SeqSBAIJ() is vertually same as MatIncreaseOverlap_SeqBAIJ()
10*5eee224dSHong Zhang    except in the line 1: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
11*5eee224dSHong Zhang    and is slightly polished.
12*5eee224dSHong Zhang    Should the two function be combined into a single piece of code? */
134a2ae208SSatish Balay #undef __FUNCT__
144a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
15268466fbSBarry Smith int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS is[],int ov)
1649b5e25fSSatish Balay {
17*5eee224dSHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
18*5eee224dSHong Zhang   int          brow,i,j,k,l,mbs,n,*idx,ierr,*nidx,isz,bcol;
19*5eee224dSHong Zhang   int          start,end,*ai,*aj,bs,*nidx2;
20*5eee224dSHong Zhang   PetscBT      table;
21*5eee224dSHong Zhang 
2249b5e25fSSatish Balay   PetscFunctionBegin;
23*5eee224dSHong Zhang   mbs = a->mbs;
24*5eee224dSHong Zhang   ai  = a->i;
25*5eee224dSHong Zhang   aj  = a->j;
26*5eee224dSHong Zhang   bs  = a->bs;
27*5eee224dSHong Zhang 
28*5eee224dSHong Zhang   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
29*5eee224dSHong Zhang 
30*5eee224dSHong Zhang   ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr);
31*5eee224dSHong Zhang   ierr = PetscMalloc((mbs+1)*sizeof(int),&nidx);CHKERRQ(ierr);
32*5eee224dSHong Zhang   ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr);
33*5eee224dSHong Zhang 
34*5eee224dSHong Zhang   for (i=0; i<is_max; i++) {
35*5eee224dSHong Zhang     /* Initialise the two local arrays */
36*5eee224dSHong Zhang     isz  = 0;
37*5eee224dSHong Zhang     ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr);
38*5eee224dSHong Zhang 
39*5eee224dSHong Zhang     /* Extract the indices, assume there can be duplicate entries */
40*5eee224dSHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
41*5eee224dSHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
42*5eee224dSHong Zhang 
43*5eee224dSHong Zhang     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
44*5eee224dSHong Zhang     for (j=0; j<n ; ++j){
45*5eee224dSHong Zhang       bcol = idx[j]/bs; /* convert the indices into block indices */
46*5eee224dSHong Zhang       if (bcol >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
47*5eee224dSHong Zhang       if(!PetscBTLookupSet(table,bcol)) { nidx[isz++] = bcol;}
48*5eee224dSHong Zhang     }
49*5eee224dSHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
50*5eee224dSHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
51*5eee224dSHong Zhang 
52*5eee224dSHong Zhang     k = 0;
53*5eee224dSHong Zhang     for (j=0; j<ov; j++){ /* for each overlap*/
54*5eee224dSHong Zhang       n = isz;
55*5eee224dSHong Zhang       for (; k<n ; k++){ /* do only those brows in nidx[k], which are not done yet */
56*5eee224dSHong Zhang         brow   = nidx[k];
57*5eee224dSHong Zhang         start = ai[brow];
58*5eee224dSHong Zhang         end   = ai[brow+1];
59*5eee224dSHong Zhang         for (l = start; l<end ; l++){
60*5eee224dSHong Zhang           bcol = aj[l];
61*5eee224dSHong Zhang           if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
62*5eee224dSHong Zhang         }
63*5eee224dSHong Zhang       }
64*5eee224dSHong Zhang     }
65*5eee224dSHong Zhang     /* expand the Index Set */
66*5eee224dSHong Zhang     for (j=0; j<isz; j++) {
67*5eee224dSHong Zhang       for (k=0; k<bs; k++)
68*5eee224dSHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
69*5eee224dSHong Zhang     }
70*5eee224dSHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
71*5eee224dSHong Zhang   }
72*5eee224dSHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
73*5eee224dSHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
74*5eee224dSHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
75*5eee224dSHong Zhang   PetscFunctionReturn(0);
7649b5e25fSSatish Balay }
7749b5e25fSSatish Balay 
784a2ae208SSatish Balay #undef __FUNCT__
794a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
8049b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
8149b5e25fSSatish Balay {
8249b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
8349b5e25fSSatish Balay   int          *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens;
8449b5e25fSSatish Balay   int          row,mat_i,*mat_j,tcol,*mat_ilen;
8549b5e25fSSatish Balay   int          *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2;
8649b5e25fSSatish Balay   int          *aj = a->j,*ai = a->i;
8749b5e25fSSatish Balay   MatScalar    *mat_a;
8849b5e25fSSatish Balay   Mat          C;
8949b5e25fSSatish Balay   PetscTruth   flag;
9049b5e25fSSatish Balay 
9149b5e25fSSatish Balay   PetscFunctionBegin;
9249b5e25fSSatish Balay 
93ac355199SBarry Smith   if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
9449b5e25fSSatish Balay   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
95347d480fSBarry Smith   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
9649b5e25fSSatish Balay 
9749b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
9849b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
9949b5e25fSSatish Balay 
100b0a32e0cSBarry Smith   ierr  = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr);
10149b5e25fSSatish Balay   ssmap = smap;
102b0a32e0cSBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr);
10349b5e25fSSatish Balay   ierr  = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr);
10449b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
10549b5e25fSSatish Balay   /* determine lens of each row */
10649b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
10749b5e25fSSatish Balay     kstart  = ai[irow[i]];
10849b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
10949b5e25fSSatish Balay     lens[i] = 0;
11049b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
11149b5e25fSSatish Balay         if (ssmap[aj[k]]) {
11249b5e25fSSatish Balay           lens[i]++;
11349b5e25fSSatish Balay         }
11449b5e25fSSatish Balay       }
11549b5e25fSSatish Balay     }
11649b5e25fSSatish Balay   /* Create and fill new matrix */
11749b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
11849b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
11949b5e25fSSatish Balay 
120347d480fSBarry Smith     if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
12149b5e25fSSatish Balay     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr);
12249b5e25fSSatish Balay     if (flag == PETSC_FALSE) {
123347d480fSBarry Smith       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
12449b5e25fSSatish Balay     }
12549b5e25fSSatish Balay     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr);
12649b5e25fSSatish Balay     C = *B;
12749b5e25fSSatish Balay   } else {
128e2d9671bSKris Buschelman     ierr = MatCreate(A->comm,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE,&C);CHKERRQ(ierr);
129e2d9671bSKris Buschelman     ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
130e2d9671bSKris Buschelman     ierr = MatSeqSBAIJSetPreallocation(C,bs,0,lens);CHKERRQ(ierr);
13149b5e25fSSatish Balay   }
13249b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
13349b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
13449b5e25fSSatish Balay     row    = irow[i];
13549b5e25fSSatish Balay     kstart = ai[row];
13649b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
13749b5e25fSSatish Balay     mat_i  = c->i[i];
13849b5e25fSSatish Balay     mat_j  = c->j + mat_i;
13949b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
14049b5e25fSSatish Balay     mat_ilen = c->ilen + i;
14149b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
14249b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
14349b5e25fSSatish Balay         *mat_j++ = tcol - 1;
14449b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
14549b5e25fSSatish Balay         mat_a   += bs2;
14649b5e25fSSatish Balay         (*mat_ilen)++;
14749b5e25fSSatish Balay       }
14849b5e25fSSatish Balay     }
14949b5e25fSSatish Balay   }
15049b5e25fSSatish Balay 
15149b5e25fSSatish Balay   /* Free work space */
15249b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
15349b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
15449b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15549b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15649b5e25fSSatish Balay 
15749b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
15849b5e25fSSatish Balay   *B = C;
15949b5e25fSSatish Balay   PetscFunctionReturn(0);
16049b5e25fSSatish Balay }
16149b5e25fSSatish Balay 
1624a2ae208SSatish Balay #undef __FUNCT__
1634a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
16449b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
16549b5e25fSSatish Balay {
16649b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
16749b5e25fSSatish Balay   IS          is1;
16849b5e25fSSatish Balay   int         *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count;
16949b5e25fSSatish Balay 
17049b5e25fSSatish Balay   PetscFunctionBegin;
171ac355199SBarry Smith   if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
17249b5e25fSSatish Balay 
17349b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
17449b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
17549b5e25fSSatish Balay 
17649b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
17749b5e25fSSatish Balay    and form the IS with compressed IS */
17882502324SSatish Balay   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr);
17949b5e25fSSatish Balay   iary = vary + a->mbs;
18049b5e25fSSatish Balay   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
18149b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
18249b5e25fSSatish Balay 
18349b5e25fSSatish Balay   count = 0;
18449b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
185ac355199SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks");
18649b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
18749b5e25fSSatish Balay   }
18849b5e25fSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
18949b5e25fSSatish Balay 
19049b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
19149b5e25fSSatish Balay   ierr = PetscFree(vary);CHKERRQ(ierr);
19249b5e25fSSatish Balay 
19349b5e25fSSatish Balay   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr);
19449b5e25fSSatish Balay   ISDestroy(is1);
19549b5e25fSSatish Balay   PetscFunctionReturn(0);
19649b5e25fSSatish Balay }
19749b5e25fSSatish Balay 
1984a2ae208SSatish Balay #undef __FUNCT__
1994a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
200268466fbSBarry Smith int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
20149b5e25fSSatish Balay {
20249b5e25fSSatish Balay   int ierr,i;
20349b5e25fSSatish Balay 
20449b5e25fSSatish Balay   PetscFunctionBegin;
20549b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
20682502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
20749b5e25fSSatish Balay   }
20849b5e25fSSatish Balay 
20949b5e25fSSatish Balay   for (i=0; i<n; i++) {
21049b5e25fSSatish Balay     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
21149b5e25fSSatish Balay   }
21249b5e25fSSatish Balay   PetscFunctionReturn(0);
21349b5e25fSSatish Balay }
21449b5e25fSSatish Balay 
21549b5e25fSSatish Balay /* -------------------------------------------------------*/
21649b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
21749b5e25fSSatish Balay /* -------------------------------------------------------*/
218d9eff348SSatish Balay #include "petscblaslapack.h"
21949b5e25fSSatish Balay 
2204a2ae208SSatish Balay #undef __FUNCT__
2214a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1"
22249b5e25fSSatish Balay int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
22349b5e25fSSatish Balay {
22449b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
22587828ca2SBarry Smith   PetscScalar     *x,*z,*xb,x1,zero=0.0;
22649b5e25fSSatish Balay   MatScalar       *v;
227831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
22849b5e25fSSatish Balay 
22949b5e25fSSatish Balay   PetscFunctionBegin;
23049b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
231b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
232b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
23349b5e25fSSatish Balay 
23449b5e25fSSatish Balay   v  = a->a;
23549b5e25fSSatish Balay   xb = x;
23649b5e25fSSatish Balay 
23749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
23849b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
23949b5e25fSSatish Balay     x1 = xb[0];
24049b5e25fSSatish Balay     ib = aj + *ai;
241831a3094SHong Zhang     jmin = 0;
242831a3094SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
243831a3094SHong Zhang       z[i] += *v++ * x[*ib++];
244831a3094SHong Zhang       jmin++;
245831a3094SHong Zhang     }
246831a3094SHong Zhang     for (j=jmin; j<n; j++) {
24749b5e25fSSatish Balay       cval    = *ib;
24849b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
24949b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
25049b5e25fSSatish Balay     }
25149b5e25fSSatish Balay     xb++; ai++;
25249b5e25fSSatish Balay   }
25349b5e25fSSatish Balay 
254b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
255b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
2566c6c5352SBarry Smith   PetscLogFlops(2*(a->nz*2 - A->m) - A->m);  /* nz = (nz+m)/2 */
25749b5e25fSSatish Balay   PetscFunctionReturn(0);
25849b5e25fSSatish Balay }
25949b5e25fSSatish Balay 
2604a2ae208SSatish Balay #undef __FUNCT__
2614a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
26249b5e25fSSatish Balay int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
26349b5e25fSSatish Balay {
26449b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
26587828ca2SBarry Smith   PetscScalar      *x,*z,*xb,x1,x2,zero=0.0;
26649b5e25fSSatish Balay   MatScalar        *v;
267831a3094SHong Zhang   int              mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
26849b5e25fSSatish Balay 
26949b5e25fSSatish Balay 
27049b5e25fSSatish Balay   PetscFunctionBegin;
27149b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
272b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
273b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
27449b5e25fSSatish Balay 
27549b5e25fSSatish Balay   v     = a->a;
27649b5e25fSSatish Balay   xb = x;
27749b5e25fSSatish Balay 
27849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
27949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
28049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
28149b5e25fSSatish Balay     ib = aj + *ai;
282831a3094SHong Zhang     jmin = 0;
2837fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
28449b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
28549b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
286831a3094SHong Zhang       v += 4; jmin++;
2877fbae186SHong Zhang     }
288831a3094SHong Zhang     for (j=jmin; j<n; j++) {
28949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
29049b5e25fSSatish Balay       cval       = ib[j]*2;
29149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
29249b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
29349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
29449b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
29549b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
29649b5e25fSSatish Balay       v  += 4;
29749b5e25fSSatish Balay     }
29849b5e25fSSatish Balay     xb +=2; ai++;
29949b5e25fSSatish Balay   }
30049b5e25fSSatish Balay 
301b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
302b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
3036c6c5352SBarry Smith   PetscLogFlops(8*(a->nz*2 - A->m) - A->m);
30449b5e25fSSatish Balay   PetscFunctionReturn(0);
30549b5e25fSSatish Balay }
30649b5e25fSSatish Balay 
3074a2ae208SSatish Balay #undef __FUNCT__
3084a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
30949b5e25fSSatish Balay int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
31049b5e25fSSatish Balay {
31149b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
31287828ca2SBarry Smith   PetscScalar   *x,*z,*xb,x1,x2,x3,zero=0.0;
31349b5e25fSSatish Balay   MatScalar     *v;
314831a3094SHong Zhang   int           mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
31549b5e25fSSatish Balay 
31649b5e25fSSatish Balay 
31749b5e25fSSatish Balay   PetscFunctionBegin;
31849b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
319b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
320b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
32149b5e25fSSatish Balay 
32249b5e25fSSatish Balay   v     = a->a;
32349b5e25fSSatish Balay   xb = x;
32449b5e25fSSatish Balay 
32549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
32649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
32749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
32849b5e25fSSatish Balay     ib = aj + *ai;
329831a3094SHong Zhang     jmin = 0;
3307fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
33149b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
33249b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
33349b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
334831a3094SHong Zhang       v += 9; jmin++;
3357fbae186SHong Zhang     }
336831a3094SHong Zhang     for (j=jmin; j<n; j++) {
33749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
33849b5e25fSSatish Balay       cval       = ib[j]*3;
33949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
34049b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
34149b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
34249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
34349b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
34449b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
34549b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
34649b5e25fSSatish Balay       v  += 9;
34749b5e25fSSatish Balay     }
34849b5e25fSSatish Balay     xb +=3; ai++;
34949b5e25fSSatish Balay   }
35049b5e25fSSatish Balay 
351b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
352b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
3536c6c5352SBarry Smith   PetscLogFlops(18*(a->nz*2 - A->m) - A->m);
35449b5e25fSSatish Balay   PetscFunctionReturn(0);
35549b5e25fSSatish Balay }
35649b5e25fSSatish Balay 
3574a2ae208SSatish Balay #undef __FUNCT__
3584a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
35949b5e25fSSatish Balay int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
36049b5e25fSSatish Balay {
36149b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
36287828ca2SBarry Smith   PetscScalar      *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
36349b5e25fSSatish Balay   MatScalar        *v;
364831a3094SHong Zhang   int              mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
36549b5e25fSSatish Balay 
36649b5e25fSSatish Balay   PetscFunctionBegin;
36749b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
368b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
369b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
37049b5e25fSSatish Balay 
37149b5e25fSSatish Balay   v     = a->a;
37249b5e25fSSatish Balay   xb = x;
37349b5e25fSSatish Balay 
37449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
37549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
37649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
37749b5e25fSSatish Balay     ib = aj + *ai;
378831a3094SHong Zhang     jmin = 0;
3797fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
38049b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
38149b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
38249b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
38349b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
384831a3094SHong Zhang       v += 16; jmin++;
3857fbae186SHong Zhang     }
386831a3094SHong Zhang     for (j=jmin; j<n; j++) {
38749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
38849b5e25fSSatish Balay       cval       = ib[j]*4;
38949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
39049b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
39149b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
39249b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
39349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
39449b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
39549b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
39649b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
39749b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
39849b5e25fSSatish Balay       v  += 16;
39949b5e25fSSatish Balay     }
40049b5e25fSSatish Balay     xb +=4; ai++;
40149b5e25fSSatish Balay   }
40249b5e25fSSatish Balay 
403b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
404b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
4056c6c5352SBarry Smith   PetscLogFlops(32*(a->nz*2 - A->m) - A->m);
40649b5e25fSSatish Balay   PetscFunctionReturn(0);
40749b5e25fSSatish Balay }
40849b5e25fSSatish Balay 
4094a2ae208SSatish Balay #undef __FUNCT__
4104a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
41149b5e25fSSatish Balay int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
41249b5e25fSSatish Balay {
41349b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
41487828ca2SBarry Smith   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
41549b5e25fSSatish Balay   MatScalar       *v;
416831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
41749b5e25fSSatish Balay 
41849b5e25fSSatish Balay   PetscFunctionBegin;
41949b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
420b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
421b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
42249b5e25fSSatish Balay 
42349b5e25fSSatish Balay   v     = a->a;
42449b5e25fSSatish Balay   xb = x;
42549b5e25fSSatish Balay 
42649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
42749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
42849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
42949b5e25fSSatish Balay     ib = aj + *ai;
430831a3094SHong Zhang     jmin = 0;
4317fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
43249b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
43349b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
43449b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
43549b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
43649b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
437831a3094SHong Zhang       v += 25; jmin++;
4387fbae186SHong Zhang     }
439831a3094SHong Zhang     for (j=jmin; j<n; j++) {
44049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
44149b5e25fSSatish Balay       cval       = ib[j]*5;
44249b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
44349b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
44449b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
44549b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
44649b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
44749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
44849b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
44949b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
45049b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
45149b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
45249b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
45349b5e25fSSatish Balay       v  += 25;
45449b5e25fSSatish Balay     }
45549b5e25fSSatish Balay     xb +=5; ai++;
45649b5e25fSSatish Balay   }
45749b5e25fSSatish Balay 
458b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
459b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
4606c6c5352SBarry Smith   PetscLogFlops(50*(a->nz*2 - A->m) - A->m);
46149b5e25fSSatish Balay   PetscFunctionReturn(0);
46249b5e25fSSatish Balay }
46349b5e25fSSatish Balay 
46449b5e25fSSatish Balay 
4654a2ae208SSatish Balay #undef __FUNCT__
4664a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
46749b5e25fSSatish Balay int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
46849b5e25fSSatish Balay {
46949b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
47087828ca2SBarry Smith   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
47149b5e25fSSatish Balay   MatScalar       *v;
472831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
47349b5e25fSSatish Balay 
47449b5e25fSSatish Balay   PetscFunctionBegin;
47549b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
476b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
477b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
47849b5e25fSSatish Balay 
47949b5e25fSSatish Balay   v     = a->a;
48049b5e25fSSatish Balay   xb = x;
48149b5e25fSSatish Balay 
48249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
48349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
48449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
48549b5e25fSSatish Balay     ib = aj + *ai;
486831a3094SHong Zhang     jmin = 0;
4877fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
48849b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
48949b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
49049b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
49149b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
49249b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
49349b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
494831a3094SHong Zhang       v += 36; jmin++;
4957fbae186SHong Zhang     }
496831a3094SHong Zhang     for (j=jmin; j<n; j++) {
49749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
49849b5e25fSSatish Balay       cval       = ib[j]*6;
49949b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
50049b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
50149b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
50249b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
50349b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
50449b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
50549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
50649b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
50749b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
50849b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
50949b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
51049b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
51149b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
51249b5e25fSSatish Balay       v  += 36;
51349b5e25fSSatish Balay     }
51449b5e25fSSatish Balay     xb +=6; ai++;
51549b5e25fSSatish Balay   }
51649b5e25fSSatish Balay 
517b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
518b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
5196c6c5352SBarry Smith   PetscLogFlops(72*(a->nz*2 - A->m) - A->m);
52049b5e25fSSatish Balay   PetscFunctionReturn(0);
52149b5e25fSSatish Balay }
5224a2ae208SSatish Balay #undef __FUNCT__
5234a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
52449b5e25fSSatish Balay int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
52549b5e25fSSatish Balay {
52649b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
52787828ca2SBarry Smith   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
52849b5e25fSSatish Balay   MatScalar       *v;
529831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
53049b5e25fSSatish Balay 
53149b5e25fSSatish Balay   PetscFunctionBegin;
53249b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
533b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
534b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
53549b5e25fSSatish Balay 
53649b5e25fSSatish Balay   v     = a->a;
53749b5e25fSSatish Balay   xb = x;
53849b5e25fSSatish Balay 
53949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
54049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
54149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
54249b5e25fSSatish Balay     ib = aj + *ai;
543831a3094SHong Zhang     jmin = 0;
5447fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
54549b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
54649b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
54749b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
54849b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
54949b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
55049b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
55149b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
552831a3094SHong Zhang       v += 49; jmin++;
5537fbae186SHong Zhang     }
554831a3094SHong Zhang     for (j=jmin; j<n; j++) {
55549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
55649b5e25fSSatish Balay       cval       = ib[j]*7;
55749b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
55849b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
55949b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
56049b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
56149b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
56249b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
56349b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
56449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
56549b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
56649b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
56749b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
56849b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
56949b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
57049b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
57149b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
57249b5e25fSSatish Balay       v  += 49;
57349b5e25fSSatish Balay     }
57449b5e25fSSatish Balay     xb +=7; ai++;
57549b5e25fSSatish Balay   }
576b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
577b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
5786c6c5352SBarry Smith   PetscLogFlops(98*(a->nz*2 - A->m) - A->m);
57949b5e25fSSatish Balay   PetscFunctionReturn(0);
58049b5e25fSSatish Balay }
58149b5e25fSSatish Balay 
58249b5e25fSSatish Balay /*
58349b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
58449b5e25fSSatish Balay */
5854a2ae208SSatish Balay #undef __FUNCT__
5864a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
58749b5e25fSSatish Balay int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
58849b5e25fSSatish Balay {
58949b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
59087828ca2SBarry Smith   PetscScalar     *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
5910b60a74dSHong Zhang   MatScalar       *v;
592066653e3SSatish Balay   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
5930b60a74dSHong Zhang   int             ncols,k;
59449b5e25fSSatish Balay 
59549b5e25fSSatish Balay   PetscFunctionBegin;
59649b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
597b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); x_ptr=x;
598b1d4fb26SBarry Smith   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); z_ptr=z;
59949b5e25fSSatish Balay 
60049b5e25fSSatish Balay   aj   = a->j;
60149b5e25fSSatish Balay   v    = a->a;
60249b5e25fSSatish Balay   ii   = a->i;
60349b5e25fSSatish Balay 
60449b5e25fSSatish Balay   if (!a->mult_work) {
60587828ca2SBarry Smith     ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
60649b5e25fSSatish Balay   }
60749b5e25fSSatish Balay   work = a->mult_work;
60849b5e25fSSatish Balay 
60949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
61049b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
61149b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
61249b5e25fSSatish Balay 
61349b5e25fSSatish Balay     /* upper triangular part */
61449b5e25fSSatish Balay     for (j=0; j<n; j++) {
61549b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
61649b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
61749b5e25fSSatish Balay       workt += bs;
61849b5e25fSSatish Balay     }
61949b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
62049b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
62149b5e25fSSatish Balay 
62249b5e25fSSatish Balay     /* strict lower triangular part */
623831a3094SHong Zhang     idx = aj+ii[0];
624831a3094SHong Zhang     if (*idx == i){
62596b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
626831a3094SHong Zhang     }
62796b9376eSHong Zhang 
62849b5e25fSSatish Balay     if (ncols > 0){
62949b5e25fSSatish Balay       workt = work;
63087828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
631831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
632831a3094SHong Zhang       for (j=0; j<n; j++) {
633831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
63449b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
63549b5e25fSSatish Balay         workt += bs;
63649b5e25fSSatish Balay       }
63749b5e25fSSatish Balay     }
63849b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
63949b5e25fSSatish Balay   }
64049b5e25fSSatish Balay 
641b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
642b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
6436c6c5352SBarry Smith   PetscLogFlops(2*(a->nz*2 - A->m)*bs2 - A->m);
64449b5e25fSSatish Balay   PetscFunctionReturn(0);
64549b5e25fSSatish Balay }
64649b5e25fSSatish Balay 
6474a2ae208SSatish Balay #undef __FUNCT__
6484a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
64949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
65049b5e25fSSatish Balay {
65149b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
65287828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1;
65349b5e25fSSatish Balay   MatScalar       *v;
654831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
65549b5e25fSSatish Balay 
65649b5e25fSSatish Balay   PetscFunctionBegin;
657b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
65849b5e25fSSatish Balay   if (yy != xx) {
659b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
66049b5e25fSSatish Balay   } else {
66149b5e25fSSatish Balay     y = x;
66249b5e25fSSatish Balay   }
66349b5e25fSSatish Balay   if (zz != yy) {
664a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
665b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
666a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
66749b5e25fSSatish Balay   } else {
66849b5e25fSSatish Balay     z = y;
66949b5e25fSSatish Balay   }
67049b5e25fSSatish Balay 
67149b5e25fSSatish Balay   v  = a->a;
67249b5e25fSSatish Balay   xb = x;
67349b5e25fSSatish Balay 
67449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
67549b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
67649b5e25fSSatish Balay     x1 = xb[0];
67749b5e25fSSatish Balay     ib = aj + *ai;
678831a3094SHong Zhang     jmin = 0;
679831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
680831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
681831a3094SHong Zhang     }
682831a3094SHong Zhang     for (j=jmin; j<n; j++) {
68349b5e25fSSatish Balay       cval    = *ib;
68449b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
68549b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
68649b5e25fSSatish Balay     }
68749b5e25fSSatish Balay     xb++; ai++;
68849b5e25fSSatish Balay   }
68949b5e25fSSatish Balay 
690b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
691b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
692b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
69349b5e25fSSatish Balay 
6946c6c5352SBarry Smith   PetscLogFlops(2*(a->nz*2 - A->m));
69549b5e25fSSatish Balay   PetscFunctionReturn(0);
69649b5e25fSSatish Balay }
69749b5e25fSSatish Balay 
6984a2ae208SSatish Balay #undef __FUNCT__
6994a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
70049b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
70149b5e25fSSatish Balay {
70249b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
70387828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2;
70449b5e25fSSatish Balay   MatScalar       *v;
705831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
70649b5e25fSSatish Balay 
70749b5e25fSSatish Balay   PetscFunctionBegin;
708b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
70949b5e25fSSatish Balay   if (yy != xx) {
710b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
71149b5e25fSSatish Balay   } else {
71249b5e25fSSatish Balay     y = x;
71349b5e25fSSatish Balay   }
71449b5e25fSSatish Balay   if (zz != yy) {
715a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
716b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
717a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
71849b5e25fSSatish Balay   } else {
71949b5e25fSSatish Balay     z = y;
72049b5e25fSSatish Balay   }
72149b5e25fSSatish Balay 
72249b5e25fSSatish Balay   v     = a->a;
72349b5e25fSSatish Balay   xb = x;
72449b5e25fSSatish Balay 
72549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
72649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
72749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
72849b5e25fSSatish Balay     ib = aj + *ai;
729831a3094SHong Zhang     jmin = 0;
7307fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
73149b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
73249b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
733831a3094SHong Zhang       v += 4; jmin++;
7347fbae186SHong Zhang     }
735831a3094SHong Zhang     for (j=jmin; j<n; j++) {
73649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
73749b5e25fSSatish Balay       cval       = ib[j]*2;
73849b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
73949b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
74049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
74149b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
74249b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
74349b5e25fSSatish Balay       v  += 4;
74449b5e25fSSatish Balay     }
74549b5e25fSSatish Balay     xb +=2; ai++;
74649b5e25fSSatish Balay   }
74749b5e25fSSatish Balay 
748b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
749b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
750b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
75149b5e25fSSatish Balay 
7526c6c5352SBarry Smith   PetscLogFlops(4*(a->nz*2 - A->m));
75349b5e25fSSatish Balay   PetscFunctionReturn(0);
75449b5e25fSSatish Balay }
75549b5e25fSSatish Balay 
7564a2ae208SSatish Balay #undef __FUNCT__
7574a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
75849b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
75949b5e25fSSatish Balay {
76049b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
76187828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3;
76249b5e25fSSatish Balay   MatScalar       *v;
763831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
76449b5e25fSSatish Balay 
76549b5e25fSSatish Balay   PetscFunctionBegin;
766b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
76749b5e25fSSatish Balay   if (yy != xx) {
768b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
76949b5e25fSSatish Balay   } else {
77049b5e25fSSatish Balay     y = x;
77149b5e25fSSatish Balay   }
77249b5e25fSSatish Balay   if (zz != yy) {
773a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
774b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
775a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
77649b5e25fSSatish Balay   } else {
77749b5e25fSSatish Balay     z = y;
77849b5e25fSSatish Balay   }
77949b5e25fSSatish Balay 
78049b5e25fSSatish Balay   v     = a->a;
78149b5e25fSSatish Balay   xb = x;
78249b5e25fSSatish Balay 
78349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
78449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
78549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
78649b5e25fSSatish Balay     ib = aj + *ai;
787831a3094SHong Zhang     jmin = 0;
7887fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
78949b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
79049b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
79149b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
792831a3094SHong Zhang      v += 9; jmin++;
7937fbae186SHong Zhang     }
794831a3094SHong Zhang     for (j=jmin; j<n; j++) {
79549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
79649b5e25fSSatish Balay       cval       = ib[j]*3;
79749b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
79849b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
79949b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
80049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
80149b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
80249b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
80349b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
80449b5e25fSSatish Balay       v  += 9;
80549b5e25fSSatish Balay     }
80649b5e25fSSatish Balay     xb +=3; ai++;
80749b5e25fSSatish Balay   }
80849b5e25fSSatish Balay 
809b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
810b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
811b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
81249b5e25fSSatish Balay 
8136c6c5352SBarry Smith   PetscLogFlops(18*(a->nz*2 - A->m));
81449b5e25fSSatish Balay   PetscFunctionReturn(0);
81549b5e25fSSatish Balay }
81649b5e25fSSatish Balay 
8174a2ae208SSatish Balay #undef __FUNCT__
8184a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
81949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
82049b5e25fSSatish Balay {
82149b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
82287828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4;
82349b5e25fSSatish Balay   MatScalar       *v;
824831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
82549b5e25fSSatish Balay 
82649b5e25fSSatish Balay   PetscFunctionBegin;
827b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
82849b5e25fSSatish Balay   if (yy != xx) {
829b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
83049b5e25fSSatish Balay   } else {
83149b5e25fSSatish Balay     y = x;
83249b5e25fSSatish Balay   }
83349b5e25fSSatish Balay   if (zz != yy) {
834a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
835b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
836a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
83749b5e25fSSatish Balay   } else {
83849b5e25fSSatish Balay     z = y;
83949b5e25fSSatish Balay   }
84049b5e25fSSatish Balay 
84149b5e25fSSatish Balay   v     = a->a;
84249b5e25fSSatish Balay   xb = x;
84349b5e25fSSatish Balay 
84449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
84549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
84649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
84749b5e25fSSatish Balay     ib = aj + *ai;
848831a3094SHong Zhang     jmin = 0;
8497fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
85049b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
85149b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
85249b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
85349b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
854831a3094SHong Zhang       v += 16; jmin++;
8557fbae186SHong Zhang     }
856831a3094SHong Zhang     for (j=jmin; j<n; j++) {
85749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
85849b5e25fSSatish Balay       cval       = ib[j]*4;
85949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
86049b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
86149b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
86249b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
86349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
86449b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
86549b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
86649b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
86749b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
86849b5e25fSSatish Balay       v  += 16;
86949b5e25fSSatish Balay     }
87049b5e25fSSatish Balay     xb +=4; ai++;
87149b5e25fSSatish Balay   }
87249b5e25fSSatish Balay 
873b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
874b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
875b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
87649b5e25fSSatish Balay 
8776c6c5352SBarry Smith   PetscLogFlops(32*(a->nz*2 - A->m));
87849b5e25fSSatish Balay   PetscFunctionReturn(0);
87949b5e25fSSatish Balay }
88049b5e25fSSatish Balay 
8814a2ae208SSatish Balay #undef __FUNCT__
8824a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
88349b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
88449b5e25fSSatish Balay {
88549b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
88687828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5;
88749b5e25fSSatish Balay   MatScalar       *v;
888831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
88949b5e25fSSatish Balay 
89049b5e25fSSatish Balay   PetscFunctionBegin;
891b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
89249b5e25fSSatish Balay   if (yy != xx) {
893b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
89449b5e25fSSatish Balay   } else {
89549b5e25fSSatish Balay     y = x;
89649b5e25fSSatish Balay   }
89749b5e25fSSatish Balay   if (zz != yy) {
898a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
899b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
900a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
90149b5e25fSSatish Balay   } else {
90249b5e25fSSatish Balay     z = y;
90349b5e25fSSatish Balay   }
90449b5e25fSSatish Balay 
90549b5e25fSSatish Balay   v     = a->a;
90649b5e25fSSatish Balay   xb = x;
90749b5e25fSSatish Balay 
90849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
90949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
91049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
91149b5e25fSSatish Balay     ib = aj + *ai;
912831a3094SHong Zhang     jmin = 0;
9137fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
91449b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
91549b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
91649b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
91749b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
91849b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
919831a3094SHong Zhang       v += 25; jmin++;
9207fbae186SHong Zhang     }
921831a3094SHong Zhang     for (j=jmin; j<n; j++) {
92249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
92349b5e25fSSatish Balay       cval       = ib[j]*5;
92449b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
92549b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
92649b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
92749b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
92849b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
92949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
93049b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
93149b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
93249b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
93349b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
93449b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
93549b5e25fSSatish Balay       v  += 25;
93649b5e25fSSatish Balay     }
93749b5e25fSSatish Balay     xb +=5; ai++;
93849b5e25fSSatish Balay   }
93949b5e25fSSatish Balay 
940b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
941b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
942b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
94349b5e25fSSatish Balay 
9446c6c5352SBarry Smith   PetscLogFlops(50*(a->nz*2 - A->m));
94549b5e25fSSatish Balay   PetscFunctionReturn(0);
94649b5e25fSSatish Balay }
9474a2ae208SSatish Balay #undef __FUNCT__
9484a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
94949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
95049b5e25fSSatish Balay {
95149b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
95287828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6;
95349b5e25fSSatish Balay   MatScalar       *v;
954831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
95549b5e25fSSatish Balay 
95649b5e25fSSatish Balay   PetscFunctionBegin;
957b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
95849b5e25fSSatish Balay   if (yy != xx) {
959b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
96049b5e25fSSatish Balay   } else {
96149b5e25fSSatish Balay     y = x;
96249b5e25fSSatish Balay   }
96349b5e25fSSatish Balay   if (zz != yy) {
964a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
965b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
966a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
96749b5e25fSSatish Balay   } else {
96849b5e25fSSatish Balay     z = y;
96949b5e25fSSatish Balay   }
97049b5e25fSSatish Balay 
97149b5e25fSSatish Balay   v     = a->a;
97249b5e25fSSatish Balay   xb = x;
97349b5e25fSSatish Balay 
97449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
97549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
97649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
97749b5e25fSSatish Balay     ib = aj + *ai;
978831a3094SHong Zhang     jmin = 0;
9797fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
98049b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
98149b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
98249b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
98349b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
98449b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
98549b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
986831a3094SHong Zhang       v += 36; jmin++;
9877fbae186SHong Zhang     }
988831a3094SHong Zhang     for (j=jmin; j<n; j++) {
98949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
99049b5e25fSSatish Balay       cval       = ib[j]*6;
99149b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
99249b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
99349b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
99449b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
99549b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
99649b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
99749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
99849b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
99949b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
100049b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
100149b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
100249b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
100349b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
100449b5e25fSSatish Balay       v  += 36;
100549b5e25fSSatish Balay     }
100649b5e25fSSatish Balay     xb +=6; ai++;
100749b5e25fSSatish Balay   }
100849b5e25fSSatish Balay 
1009b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1010b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1011b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
101249b5e25fSSatish Balay 
10136c6c5352SBarry Smith   PetscLogFlops(72*(a->nz*2 - A->m));
101449b5e25fSSatish Balay   PetscFunctionReturn(0);
101549b5e25fSSatish Balay }
101649b5e25fSSatish Balay 
10174a2ae208SSatish Balay #undef __FUNCT__
10184a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
101949b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
102049b5e25fSSatish Balay {
102149b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
102287828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
102349b5e25fSSatish Balay   MatScalar       *v;
1024831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
102549b5e25fSSatish Balay 
102649b5e25fSSatish Balay   PetscFunctionBegin;
1027b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
102849b5e25fSSatish Balay   if (yy != xx) {
1029b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
103049b5e25fSSatish Balay   } else {
103149b5e25fSSatish Balay     y = x;
103249b5e25fSSatish Balay   }
103349b5e25fSSatish Balay   if (zz != yy) {
1034a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
1035b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
1036a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
103749b5e25fSSatish Balay   } else {
103849b5e25fSSatish Balay     z = y;
103949b5e25fSSatish Balay   }
104049b5e25fSSatish Balay 
104149b5e25fSSatish Balay   v     = a->a;
104249b5e25fSSatish Balay   xb = x;
104349b5e25fSSatish Balay 
104449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
104549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
104649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
104749b5e25fSSatish Balay     ib = aj + *ai;
1048831a3094SHong Zhang     jmin = 0;
10497fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
105049b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
105149b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
105249b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
105349b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
105449b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
105549b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
105649b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1057831a3094SHong Zhang       v += 49; jmin++;
10587fbae186SHong Zhang     }
1059831a3094SHong Zhang     for (j=jmin; j<n; j++) {
106049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
106149b5e25fSSatish Balay       cval       = ib[j]*7;
106249b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
106349b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
106449b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
106549b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
106649b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
106749b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
106849b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
106949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
107049b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
107149b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
107249b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
107349b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
107449b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
107549b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
107649b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
107749b5e25fSSatish Balay       v  += 49;
107849b5e25fSSatish Balay     }
107949b5e25fSSatish Balay     xb +=7; ai++;
108049b5e25fSSatish Balay   }
108149b5e25fSSatish Balay 
1082b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1083b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1084b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
108549b5e25fSSatish Balay 
10866c6c5352SBarry Smith   PetscLogFlops(98*(a->nz*2 - A->m));
108749b5e25fSSatish Balay   PetscFunctionReturn(0);
108849b5e25fSSatish Balay }
108949b5e25fSSatish Balay 
10904a2ae208SSatish Balay #undef __FUNCT__
10914a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
109249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
109349b5e25fSSatish Balay {
109449b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
109587828ca2SBarry Smith   PetscScalar     *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1096066653e3SSatish Balay   MatScalar       *v;
1097066653e3SSatish Balay   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
1098066653e3SSatish Balay   int             ncols,k;
109949b5e25fSSatish Balay 
110049b5e25fSSatish Balay   PetscFunctionBegin;
1101b1d4fb26SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); x_ptr=x;
110249b5e25fSSatish Balay   if (yy != xx) {
1103b1d4fb26SBarry Smith     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
110449b5e25fSSatish Balay   } else {
110549b5e25fSSatish Balay     y = x;
110649b5e25fSSatish Balay   }
110749b5e25fSSatish Balay   if (zz != yy) {
1108a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
1109b1d4fb26SBarry Smith     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr); z_ptr=z;
1110a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
111149b5e25fSSatish Balay   } else {
111249b5e25fSSatish Balay     z = y;
111349b5e25fSSatish Balay   }
111449b5e25fSSatish Balay 
111549b5e25fSSatish Balay   aj   = a->j;
111649b5e25fSSatish Balay   v    = a->a;
111749b5e25fSSatish Balay   ii   = a->i;
111849b5e25fSSatish Balay 
111949b5e25fSSatish Balay   if (!a->mult_work) {
112087828ca2SBarry Smith     ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
112149b5e25fSSatish Balay   }
112249b5e25fSSatish Balay   work = a->mult_work;
112349b5e25fSSatish Balay 
112449b5e25fSSatish Balay 
112549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
112649b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
112749b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
112849b5e25fSSatish Balay 
112949b5e25fSSatish Balay     /* upper triangular part */
113049b5e25fSSatish Balay     for (j=0; j<n; j++) {
113149b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
113249b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
113349b5e25fSSatish Balay       workt += bs;
113449b5e25fSSatish Balay     }
113549b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
113649b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
113749b5e25fSSatish Balay 
113849b5e25fSSatish Balay     /* strict lower triangular part */
1139831a3094SHong Zhang     idx = aj+ii[0];
1140831a3094SHong Zhang     if (*idx == i){
114196b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1142831a3094SHong Zhang     }
114349b5e25fSSatish Balay     if (ncols > 0){
114449b5e25fSSatish Balay       workt = work;
114587828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1146831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1147831a3094SHong Zhang       for (j=0; j<n; j++) {
1148831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
1149831a3094SHong Zhang         /* idx++; */
115049b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
115149b5e25fSSatish Balay         workt += bs;
115249b5e25fSSatish Balay       }
115349b5e25fSSatish Balay     }
115449b5e25fSSatish Balay 
115549b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
115649b5e25fSSatish Balay   }
115749b5e25fSSatish Balay 
1158b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1159b1d4fb26SBarry Smith   if (yy != xx) ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1160b1d4fb26SBarry Smith   if (zz != yy) ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
116149b5e25fSSatish Balay 
11626c6c5352SBarry Smith   PetscLogFlops(2*(a->nz*2 - A->m));
116349b5e25fSSatish Balay   PetscFunctionReturn(0);
116449b5e25fSSatish Balay }
116549b5e25fSSatish Balay 
11664a2ae208SSatish Balay #undef __FUNCT__
11674a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_SeqSBAIJ"
116849b5e25fSSatish Balay int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz)
116949b5e25fSSatish Balay {
1170eeeff2ecSHong Zhang   int ierr;
1171eeeff2ecSHong Zhang 
117249b5e25fSSatish Balay   PetscFunctionBegin;
1173eeeff2ecSHong Zhang   ierr = MatMult(A,xx,zz);CHKERRQ(ierr);
1174eeeff2ecSHong Zhang   PetscFunctionReturn(0);
117549b5e25fSSatish Balay }
117649b5e25fSSatish Balay 
11774a2ae208SSatish Balay #undef __FUNCT__
11784a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_SeqSBAIJ"
117949b5e25fSSatish Balay int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
118049b5e25fSSatish Balay 
118149b5e25fSSatish Balay {
1182eeeff2ecSHong Zhang   int ierr;
1183eeeff2ecSHong Zhang 
118449b5e25fSSatish Balay   PetscFunctionBegin;
1185eeeff2ecSHong Zhang   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1186eeeff2ecSHong Zhang   PetscFunctionReturn(0);
118749b5e25fSSatish Balay }
118849b5e25fSSatish Balay 
11894a2ae208SSatish Balay #undef __FUNCT__
11904a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1191268466fbSBarry Smith int MatScale_SeqSBAIJ(const PetscScalar *alpha,Mat inA)
119249b5e25fSSatish Balay {
119349b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
11946c6c5352SBarry Smith   int         one = 1,totalnz = a->bs2*a->nz;
119549b5e25fSSatish Balay 
119649b5e25fSSatish Balay   PetscFunctionBegin;
1197268466fbSBarry Smith   BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one);
1198b0a32e0cSBarry Smith   PetscLogFlops(totalnz);
119949b5e25fSSatish Balay   PetscFunctionReturn(0);
120049b5e25fSSatish Balay }
120149b5e25fSSatish Balay 
12024a2ae208SSatish Balay #undef __FUNCT__
12034a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
120449b5e25fSSatish Balay int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
120549b5e25fSSatish Balay {
120649b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
120749b5e25fSSatish Balay   MatScalar   *v = a->a;
120849b5e25fSSatish Balay   PetscReal   sum_diag = 0.0, sum_off = 0.0, *sum;
1209831a3094SHong Zhang   int         i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1210831a3094SHong Zhang   int         *jl,*il,jmin,jmax,ierr,nexti,ik,*col;
121149b5e25fSSatish Balay 
121249b5e25fSSatish Balay   PetscFunctionBegin;
121349b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
121449b5e25fSSatish Balay     for (k=0; k<mbs; k++){
121549b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1216831a3094SHong Zhang       col  = aj + jmin;
1217831a3094SHong Zhang       if (*col == k){         /* diagonal block */
121849b5e25fSSatish Balay         for (i=0; i<bs2; i++){
121949b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
122049b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
122149b5e25fSSatish Balay #else
122249b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
122349b5e25fSSatish Balay #endif
122449b5e25fSSatish Balay         }
1225831a3094SHong Zhang         jmin++;
1226831a3094SHong Zhang       }
1227831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
122849b5e25fSSatish Balay         for (i=0; i<bs2; i++){
122949b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
123049b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
123149b5e25fSSatish Balay #else
123249b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
123349b5e25fSSatish Balay #endif
123449b5e25fSSatish Balay         }
123549b5e25fSSatish Balay       }
123649b5e25fSSatish Balay     }
123749b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
123849b5e25fSSatish Balay 
123949b5e25fSSatish Balay   }  else if (type == NORM_INFINITY) { /* maximum row sum */
124082502324SSatish Balay     ierr = PetscMalloc(mbs*sizeof(int),&il);CHKERRQ(ierr);
124182502324SSatish Balay     ierr = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr);
124282502324SSatish Balay     ierr = PetscMalloc(bs*sizeof(PetscReal),&sum);CHKERRQ(ierr);
124349b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
124449b5e25fSSatish Balay       jl[i] = mbs; il[0] = 0;
124549b5e25fSSatish Balay     }
124649b5e25fSSatish Balay 
124749b5e25fSSatish Balay     *norm = 0.0;
124849b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
124949b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
125049b5e25fSSatish Balay 
125149b5e25fSSatish Balay       /*-- col sum --*/
125249b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1253831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1254831a3094SHong Zhang                   at step k */
125549b5e25fSSatish Balay       while (i<mbs){
125649b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
125749b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
125849b5e25fSSatish Balay         for (j=0; j<bs; j++){
125949b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
126049b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
126149b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
126249b5e25fSSatish Balay           }
126349b5e25fSSatish Balay         }
126449b5e25fSSatish Balay         /* update il, jl */
1265831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1266831a3094SHong Zhang         jmax = a->i[i+1];
126749b5e25fSSatish Balay         if (jmin < jmax){
126849b5e25fSSatish Balay           il[i] = jmin;
126949b5e25fSSatish Balay           j   = a->j[jmin];
127049b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
127149b5e25fSSatish Balay         }
127249b5e25fSSatish Balay         i = nexti;
127349b5e25fSSatish Balay       }
127449b5e25fSSatish Balay 
127549b5e25fSSatish Balay       /*-- row sum --*/
127649b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
127749b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
127849b5e25fSSatish Balay         for (j=0; j<bs; j++){
127949b5e25fSSatish Balay           v = a->a + i*bs2 + j;
128049b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
128149b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v);
128249b5e25fSSatish Balay             v   += bs;
128349b5e25fSSatish Balay           }
128449b5e25fSSatish Balay         }
128549b5e25fSSatish Balay       }
128649b5e25fSSatish Balay       /* add k_th block row to il, jl */
1287831a3094SHong Zhang       col = aj+jmin;
1288831a3094SHong Zhang       if (*col == k) jmin++;
128949b5e25fSSatish Balay       if (jmin < jmax){
129049b5e25fSSatish Balay         il[k] = jmin;
129149b5e25fSSatish Balay         j   = a->j[jmin];
129249b5e25fSSatish Balay         jl[k] = jl[j]; jl[j] = k;
129349b5e25fSSatish Balay       }
129449b5e25fSSatish Balay       for (j=0; j<bs; j++){
129549b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
129649b5e25fSSatish Balay       }
129749b5e25fSSatish Balay     }
129849b5e25fSSatish Balay     ierr = PetscFree(il);CHKERRQ(ierr);
129949b5e25fSSatish Balay     ierr = PetscFree(jl);CHKERRQ(ierr);
130049b5e25fSSatish Balay     ierr = PetscFree(sum);CHKERRQ(ierr);
130149b5e25fSSatish Balay   } else {
1302347d480fSBarry Smith     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
130349b5e25fSSatish Balay   }
130449b5e25fSSatish Balay   PetscFunctionReturn(0);
130549b5e25fSSatish Balay }
130649b5e25fSSatish Balay 
13074a2ae208SSatish Balay #undef __FUNCT__
13084a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
130949b5e25fSSatish Balay int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
131049b5e25fSSatish Balay {
131149b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
131249b5e25fSSatish Balay   int          ierr;
131349b5e25fSSatish Balay 
131449b5e25fSSatish Balay   PetscFunctionBegin;
131549b5e25fSSatish Balay 
131649b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
13176c6c5352SBarry Smith   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
1318ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1319ef511fbeSHong Zhang     PetscFunctionReturn(0);
132049b5e25fSSatish Balay   }
132149b5e25fSSatish Balay 
132249b5e25fSSatish Balay   /* if the a->i are the same */
132349b5e25fSSatish Balay   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr);
132449b5e25fSSatish Balay   if (*flg == PETSC_FALSE) {
132549b5e25fSSatish Balay     PetscFunctionReturn(0);
132649b5e25fSSatish Balay   }
132749b5e25fSSatish Balay 
132849b5e25fSSatish Balay   /* if a->j are the same */
13296c6c5352SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);CHKERRQ(ierr);
133049b5e25fSSatish Balay   if (*flg == PETSC_FALSE) {
133149b5e25fSSatish Balay     PetscFunctionReturn(0);
133249b5e25fSSatish Balay   }
133349b5e25fSSatish Balay   /* if a->a are the same */
13346c6c5352SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
133549b5e25fSSatish Balay 
1336935af2e7SHong Zhang   PetscFunctionReturn(0);
133749b5e25fSSatish Balay }
133849b5e25fSSatish Balay 
13394a2ae208SSatish Balay #undef __FUNCT__
13404a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
134149b5e25fSSatish Balay int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
134249b5e25fSSatish Balay {
134349b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
134449b5e25fSSatish Balay   int          ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
134587828ca2SBarry Smith   PetscScalar  *x,zero = 0.0;
134649b5e25fSSatish Balay   MatScalar    *aa,*aa_j;
134749b5e25fSSatish Balay 
134849b5e25fSSatish Balay   PetscFunctionBegin;
134949b5e25fSSatish Balay   bs   = a->bs;
135082799104SHong Zhang   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
135182799104SHong Zhang 
135249b5e25fSSatish Balay   aa   = a->a;
135349b5e25fSSatish Balay   ai   = a->i;
135449b5e25fSSatish Balay   aj   = a->j;
135549b5e25fSSatish Balay   ambs = a->mbs;
135649b5e25fSSatish Balay   bs2  = a->bs2;
135749b5e25fSSatish Balay 
135849b5e25fSSatish Balay   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1359b1d4fb26SBarry Smith   ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr);
136049b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1361ef511fbeSHong Zhang   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
136249b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
136349b5e25fSSatish Balay     j=ai[i];
136449b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
136549b5e25fSSatish Balay       row  = i*bs;
136649b5e25fSSatish Balay       aa_j = aa + j*bs2;
136782799104SHong Zhang       if (A->factor && bs==1){
136882799104SHong Zhang         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
136982799104SHong Zhang       } else {
137049b5e25fSSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
137149b5e25fSSatish Balay       }
137249b5e25fSSatish Balay     }
137382799104SHong Zhang   }
137482799104SHong Zhang 
1375b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr);
137649b5e25fSSatish Balay   PetscFunctionReturn(0);
137749b5e25fSSatish Balay }
137849b5e25fSSatish Balay 
13794a2ae208SSatish Balay #undef __FUNCT__
13804a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
138149b5e25fSSatish Balay int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
138249b5e25fSSatish Balay {
138349b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
138487828ca2SBarry Smith   PetscScalar  *l,*r,x,*li,*ri;
138549b5e25fSSatish Balay   MatScalar    *aa,*v;
1386066653e3SSatish Balay   int          ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2;
138749b5e25fSSatish Balay 
138849b5e25fSSatish Balay   PetscFunctionBegin;
138949b5e25fSSatish Balay   ai  = a->i;
139049b5e25fSSatish Balay   aj  = a->j;
139149b5e25fSSatish Balay   aa  = a->a;
1392ef511fbeSHong Zhang   m   = A->m;
139349b5e25fSSatish Balay   bs  = a->bs;
139449b5e25fSSatish Balay   mbs = a->mbs;
139549b5e25fSSatish Balay   bs2 = a->bs2;
139649b5e25fSSatish Balay 
139749b5e25fSSatish Balay   if (ll != rr) {
1398347d480fSBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
139949b5e25fSSatish Balay   }
140049b5e25fSSatish Balay   if (ll) {
1401b1d4fb26SBarry Smith     ierr = VecGetArrayFast(ll,&l);CHKERRQ(ierr);
140249b5e25fSSatish Balay     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1403347d480fSBarry Smith     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
140449b5e25fSSatish Balay     for (i=0; i<mbs; i++) { /* for each block row */
140549b5e25fSSatish Balay       M  = ai[i+1] - ai[i];
140649b5e25fSSatish Balay       li = l + i*bs;
140749b5e25fSSatish Balay       v  = aa + bs2*ai[i];
140849b5e25fSSatish Balay       for (j=0; j<M; j++) { /* for each block */
140949b5e25fSSatish Balay         for (k=0; k<bs2; k++) {
141049b5e25fSSatish Balay           (*v++) *= li[k%bs];
141149b5e25fSSatish Balay         }
141249b5e25fSSatish Balay #ifdef CONT
141349b5e25fSSatish Balay         /* will be used to replace the above loop */
141449b5e25fSSatish Balay         ri = l + bs*aj[ai[i]+j];
141549b5e25fSSatish Balay         for (k=0; k<bs; k++) { /* column value */
141649b5e25fSSatish Balay           x = ri[k];
141749b5e25fSSatish Balay           for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
141849b5e25fSSatish Balay         }
141949b5e25fSSatish Balay #endif
142049b5e25fSSatish Balay 
142149b5e25fSSatish Balay       }
142249b5e25fSSatish Balay     }
1423b1d4fb26SBarry Smith     ierr = VecRestoreArrayFast(ll,&l);CHKERRQ(ierr);
14246c6c5352SBarry Smith     PetscLogFlops(2*a->nz);
142549b5e25fSSatish Balay   }
142649b5e25fSSatish Balay   /* will be deleted */
142749b5e25fSSatish Balay   if (rr) {
1428b1d4fb26SBarry Smith     ierr = VecGetArrayFast(rr,&r);CHKERRQ(ierr);
142949b5e25fSSatish Balay     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1430347d480fSBarry Smith     if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
143149b5e25fSSatish Balay     for (i=0; i<mbs; i++) { /* for each block row */
143249b5e25fSSatish Balay       M  = ai[i+1] - ai[i];
143349b5e25fSSatish Balay       v  = aa + bs2*ai[i];
143449b5e25fSSatish Balay       for (j=0; j<M; j++) { /* for each block */
143549b5e25fSSatish Balay         ri = r + bs*aj[ai[i]+j];
143649b5e25fSSatish Balay         for (k=0; k<bs; k++) {
143749b5e25fSSatish Balay           x = ri[k];
143849b5e25fSSatish Balay           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
143949b5e25fSSatish Balay         }
144049b5e25fSSatish Balay       }
144149b5e25fSSatish Balay     }
1442b1d4fb26SBarry Smith     ierr = VecRestoreArrayFast(rr,&r);CHKERRQ(ierr);
14436c6c5352SBarry Smith     PetscLogFlops(a->nz);
144449b5e25fSSatish Balay   }
144549b5e25fSSatish Balay   PetscFunctionReturn(0);
144649b5e25fSSatish Balay }
144749b5e25fSSatish Balay 
14484a2ae208SSatish Balay #undef __FUNCT__
14494a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
145049b5e25fSSatish Balay int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
145149b5e25fSSatish Balay {
145249b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
145349b5e25fSSatish Balay 
145449b5e25fSSatish Balay   PetscFunctionBegin;
1455ef511fbeSHong Zhang   info->rows_global    = (double)A->m;
1456ef511fbeSHong Zhang   info->columns_global = (double)A->m;
1457ef511fbeSHong Zhang   info->rows_local     = (double)A->m;
1458ef511fbeSHong Zhang   info->columns_local  = (double)A->m;
145949b5e25fSSatish Balay   info->block_size     = a->bs2;
14606c6c5352SBarry Smith   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
14616c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
146249b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
146349b5e25fSSatish Balay   info->assemblies   = A->num_ass;
146449b5e25fSSatish Balay   info->mallocs      = a->reallocs;
146549b5e25fSSatish Balay   info->memory       = A->mem;
146649b5e25fSSatish Balay   if (A->factor) {
146749b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
146849b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
146949b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
147049b5e25fSSatish Balay   } else {
147149b5e25fSSatish Balay     info->fill_ratio_given  = 0;
147249b5e25fSSatish Balay     info->fill_ratio_needed = 0;
147349b5e25fSSatish Balay     info->factor_mallocs    = 0;
147449b5e25fSSatish Balay   }
147549b5e25fSSatish Balay   PetscFunctionReturn(0);
147649b5e25fSSatish Balay }
147749b5e25fSSatish Balay 
147849b5e25fSSatish Balay 
14794a2ae208SSatish Balay #undef __FUNCT__
14804a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
148149b5e25fSSatish Balay int MatZeroEntries_SeqSBAIJ(Mat A)
148249b5e25fSSatish Balay {
148349b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
148449b5e25fSSatish Balay   int         ierr;
148549b5e25fSSatish Balay 
148649b5e25fSSatish Balay   PetscFunctionBegin;
148749b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
148849b5e25fSSatish Balay   PetscFunctionReturn(0);
148949b5e25fSSatish Balay }
1490dc354874SHong Zhang 
14914a2ae208SSatish Balay #undef __FUNCT__
14924a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqSBAIJ"
1493dc354874SHong Zhang int MatGetRowMax_SeqSBAIJ(Mat A,Vec v)
1494dc354874SHong Zhang {
1495dc354874SHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1496d0f6400bSHong Zhang   int          ierr,i,j,n,row,col,bs,*ai,*aj,mbs;
1497c3fca9a7SHong Zhang   PetscReal    atmp;
1498273d9f13SBarry Smith   MatScalar    *aa;
149987828ca2SBarry Smith   PetscScalar  zero = 0.0,*x;
1500d0f6400bSHong Zhang   int          ncols,brow,bcol,krow,kcol;
1501dc354874SHong Zhang 
1502dc354874SHong Zhang   PetscFunctionBegin;
1503dc354874SHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1504dc354874SHong Zhang   bs   = a->bs;
1505dc354874SHong Zhang   aa   = a->a;
1506dc354874SHong Zhang   ai   = a->i;
1507dc354874SHong Zhang   aj   = a->j;
150844117c81SHong Zhang   mbs = a->mbs;
1509dc354874SHong Zhang 
1510dc354874SHong Zhang   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1511b1d4fb26SBarry Smith   ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr);
1512dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1513ef511fbeSHong Zhang   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
151444117c81SHong Zhang   for (i=0; i<mbs; i++) {
1515d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1516d0f6400bSHong Zhang     brow  = bs*i;
151744117c81SHong Zhang     for (j=0; j<ncols; j++){
1518d0f6400bSHong Zhang       bcol = bs*(*aj);
151944117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1520d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
152144117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1522d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1523d0f6400bSHong Zhang           row = brow + krow;    /* row index */
152444117c81SHong Zhang           /* printf("val[%d,%d]: %g\n",row,col,atmp); */
1525c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1526c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
152744117c81SHong Zhang         }
152844117c81SHong Zhang       }
1529d0f6400bSHong Zhang       aj++;
1530dc354874SHong Zhang     }
1531dc354874SHong Zhang   }
1532b1d4fb26SBarry Smith   ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr);
1533dc354874SHong Zhang   PetscFunctionReturn(0);
1534dc354874SHong Zhang }
1535