xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision dc0b31ed242eb6637dca0468fbfa12899dc55c4b)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
249b5e25fSSatish Balay 
37c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h"
47c4f633dSBarry Smith #include "../src/inline/spops.h"
57c4f633dSBarry Smith #include "../src/inline/ilu.h"
649b5e25fSSatish Balay #include "petscbt.h"
77c4f633dSBarry Smith #include "../src/mat/impls/sbaij/seq/sbaij.h"
849b5e25fSSatish Balay 
94a2ae208SSatish Balay #undef __FUNCT__
104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1113f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1249b5e25fSSatish Balay {
135eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
146849ba73SBarry Smith   PetscErrorCode ierr;
155d0c19d7SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
165d0c19d7SBarry Smith   const PetscInt *idx;
17521d7252SBarry Smith   PetscBT        table,table0;
18d94109b8SHong Zhang 
19d94109b8SHong Zhang   PetscFunctionBegin;
20b3bf805bSHong Zhang   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
21d94109b8SHong Zhang   mbs = a->mbs;
22d94109b8SHong Zhang   ai  = a->i;
23d94109b8SHong Zhang   aj  = a->j;
24d0f46423SBarry Smith   bs  = A->rmap->bs;
25d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr);
2613f74950SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
27d0f46423SBarry Smith   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
28d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr);
29d94109b8SHong Zhang 
30d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
31d94109b8SHong Zhang     isz  = 0;
32d94109b8SHong Zhang     ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr);
33d94109b8SHong Zhang 
34d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
35d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
36d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
37d94109b8SHong Zhang 
38d94109b8SHong Zhang     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
39dbe03f88SHong Zhang     bcol_max = 0;
40d94109b8SHong Zhang     for (j=0; j<n ; ++j){
41d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
42d94109b8SHong Zhang       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43dbe03f88SHong Zhang       if(!PetscBTLookupSet(table,brow)) {
44dbe03f88SHong Zhang         nidx[isz++] = brow;
45dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
46dbe03f88SHong Zhang       }
47d94109b8SHong Zhang     }
48d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
49d94109b8SHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
50d94109b8SHong Zhang 
51d94109b8SHong Zhang     k = 0;
52d94109b8SHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
53d94109b8SHong Zhang       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
54d94109b8SHong Zhang       ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr);
55efee365bSSatish Balay       for (l=k; l<isz; l++) { ierr = PetscBTSet(table0,nidx[l]);CHKERRQ(ierr); }
56d94109b8SHong Zhang 
57d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
58d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++){
59d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
60d94109b8SHong Zhang         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
61d94109b8SHong Zhang           for (l = start; l<end ; l++){
62d94109b8SHong Zhang             bcol = aj[l];
63d94109b8SHong Zhang             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
64d94109b8SHong Zhang           }
65d94109b8SHong Zhang           k++;
66d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
67d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
68d94109b8SHong Zhang           for (l = start; l<end ; l++){
69d94109b8SHong Zhang             bcol = aj[l];
70dbe03f88SHong Zhang             if (bcol > bcol_max) break;
71d94109b8SHong Zhang             if (PetscBTLookup(table0,bcol)){
72d94109b8SHong Zhang               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
73d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
74d94109b8SHong Zhang             }
75d94109b8SHong Zhang           }
76d94109b8SHong Zhang         }
77d94109b8SHong Zhang       }
78d94109b8SHong Zhang     } /* for each overlap */
79d94109b8SHong Zhang 
80d94109b8SHong Zhang     /* expand the Index Set */
81d94109b8SHong Zhang     for (j=0; j<isz; j++) {
82d94109b8SHong Zhang       for (k=0; k<bs; k++)
83d94109b8SHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
84d94109b8SHong Zhang     }
85d94109b8SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
86d94109b8SHong Zhang   }
87d94109b8SHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
88d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
89d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
90d94109b8SHong Zhang   ierr = PetscBTDestroy(table0);CHKERRQ(ierr);
915eee224dSHong Zhang   PetscFunctionReturn(0);
9249b5e25fSSatish Balay }
9349b5e25fSSatish Balay 
944a2ae208SSatish Balay #undef __FUNCT__
954a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
9613f74950SBarry Smith PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
9749b5e25fSSatish Balay {
9849b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
996849ba73SBarry Smith   PetscErrorCode ierr;
10013f74950SBarry Smith   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
10113f74950SBarry Smith   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
1025d0c19d7SBarry Smith   PetscInt       nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
1035d0c19d7SBarry Smith   const PetscInt *irow,*aj = a->j,*ai = a->i;
10449b5e25fSSatish Balay   MatScalar      *mat_a;
10549b5e25fSSatish Balay   Mat            C;
10614ca34e6SBarry Smith   PetscTruth     flag,sorted;
10749b5e25fSSatish Balay 
10849b5e25fSSatish Balay   PetscFunctionBegin;
109e005ede5SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
11014ca34e6SBarry Smith   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
11114ca34e6SBarry Smith   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
11249b5e25fSSatish Balay 
11349b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11449b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11549b5e25fSSatish Balay 
11613f74950SBarry Smith   ierr  = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr);
11749b5e25fSSatish Balay   ssmap = smap;
11813f74950SBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
11913f74950SBarry Smith   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
12049b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
12149b5e25fSSatish Balay   /* determine lens of each row */
12249b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12349b5e25fSSatish Balay     kstart  = ai[irow[i]];
12449b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
12549b5e25fSSatish Balay     lens[i] = 0;
12649b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
12749b5e25fSSatish Balay         if (ssmap[aj[k]]) {
12849b5e25fSSatish Balay           lens[i]++;
12949b5e25fSSatish Balay         }
13049b5e25fSSatish Balay       }
13149b5e25fSSatish Balay     }
13249b5e25fSSatish Balay   /* Create and fill new matrix */
13349b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
13449b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
13549b5e25fSSatish Balay 
136d0f46423SBarry Smith     if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
13713f74950SBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
138abc0a331SBarry Smith     if (!flag) {
139347d480fSBarry Smith       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
14049b5e25fSSatish Balay     }
14113f74950SBarry Smith     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
14249b5e25fSSatish Balay     C = *B;
14349b5e25fSSatish Balay   } else {
1447adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
145f69a0ea3SMatthew Knepley     ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1467adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
147ab93d7beSBarry Smith     ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr);
14849b5e25fSSatish Balay   }
14949b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
15049b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
15149b5e25fSSatish Balay     row    = irow[i];
15249b5e25fSSatish Balay     kstart = ai[row];
15349b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
15449b5e25fSSatish Balay     mat_i  = c->i[i];
15549b5e25fSSatish Balay     mat_j  = c->j + mat_i;
15649b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
15749b5e25fSSatish Balay     mat_ilen = c->ilen + i;
15849b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
15949b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
16049b5e25fSSatish Balay         *mat_j++ = tcol - 1;
16149b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
16249b5e25fSSatish Balay         mat_a   += bs2;
16349b5e25fSSatish Balay         (*mat_ilen)++;
16449b5e25fSSatish Balay       }
16549b5e25fSSatish Balay     }
16649b5e25fSSatish Balay   }
16749b5e25fSSatish Balay 
16849b5e25fSSatish Balay   /* Free work space */
16949b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
17049b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
17149b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17249b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17349b5e25fSSatish Balay 
17449b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
17549b5e25fSSatish Balay   *B = C;
17649b5e25fSSatish Balay   PetscFunctionReturn(0);
17749b5e25fSSatish Balay }
17849b5e25fSSatish Balay 
1794a2ae208SSatish Balay #undef __FUNCT__
1804a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
18113f74950SBarry Smith PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
18249b5e25fSSatish Balay {
18349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
18449b5e25fSSatish Balay   IS             is1;
1856849ba73SBarry Smith   PetscErrorCode ierr;
1865d0c19d7SBarry Smith   PetscInt       *vary,*iary,nrows,i,bs=A->rmap->bs,count;
1875d0c19d7SBarry Smith   const PetscInt *irow;
18849b5e25fSSatish Balay 
18949b5e25fSSatish Balay   PetscFunctionBegin;
190e005ede5SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
19149b5e25fSSatish Balay 
19249b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
19349b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
19449b5e25fSSatish Balay 
19549b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
19649b5e25fSSatish Balay    and form the IS with compressed IS */
19713f74950SBarry Smith   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr);
19849b5e25fSSatish Balay   iary = vary + a->mbs;
19913f74950SBarry Smith   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
20049b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
20149b5e25fSSatish Balay 
20249b5e25fSSatish Balay   count = 0;
20349b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
204e005ede5SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
20549b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
20649b5e25fSSatish Balay   }
20749b5e25fSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
20849b5e25fSSatish Balay 
20949b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
21049b5e25fSSatish Balay   ierr = PetscFree(vary);CHKERRQ(ierr);
21149b5e25fSSatish Balay 
21249b5e25fSSatish Balay   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr);
21349b5e25fSSatish Balay   ISDestroy(is1);
21449b5e25fSSatish Balay   PetscFunctionReturn(0);
21549b5e25fSSatish Balay }
21649b5e25fSSatish Balay 
2174a2ae208SSatish Balay #undef __FUNCT__
2184a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
21913f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
22049b5e25fSSatish Balay {
2216849ba73SBarry Smith   PetscErrorCode ierr;
22213f74950SBarry Smith   PetscInt       i;
22349b5e25fSSatish Balay 
22449b5e25fSSatish Balay   PetscFunctionBegin;
22549b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
22682502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
22749b5e25fSSatish Balay   }
22849b5e25fSSatish Balay 
22949b5e25fSSatish Balay   for (i=0; i<n; i++) {
23049b5e25fSSatish Balay     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
23149b5e25fSSatish Balay   }
23249b5e25fSSatish Balay   PetscFunctionReturn(0);
23349b5e25fSSatish Balay }
23449b5e25fSSatish Balay 
23549b5e25fSSatish Balay /* -------------------------------------------------------*/
23649b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
23749b5e25fSSatish Balay /* -------------------------------------------------------*/
238d9eff348SSatish Balay #include "petscblaslapack.h"
23949b5e25fSSatish Balay 
2404a2ae208SSatish Balay #undef __FUNCT__
2414a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1"
242dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
24349b5e25fSSatish Balay {
24449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
24587828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,zero=0.0;
24649b5e25fSSatish Balay   MatScalar      *v;
2476849ba73SBarry Smith   PetscErrorCode ierr;
24813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
24998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
25049b5e25fSSatish Balay 
25149b5e25fSSatish Balay   PetscFunctionBegin;
2522dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2531ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2541ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
25549b5e25fSSatish Balay 
25649b5e25fSSatish Balay   v  = a->a;
25749b5e25fSSatish Balay   xb = x;
25849b5e25fSSatish Balay 
25949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
26049b5e25fSSatish Balay     n    = ai[1] - ai[0];  /* length of i_th row of A */
2613cb60589SBarry Smith     x1   = *xb++;
2623cb60589SBarry Smith     ib   = aj + *ai++;
263831a3094SHong Zhang     jmin = 0;
26498c9bda7SSatish Balay     nonzerorow += (n>0);
2653cb60589SBarry Smith     /* if we ALWAYS required a diagonal entry then could remove this if test */
2663cb60589SBarry Smith     /* should we use a tmp to hold the accumulated z[i] */
267831a3094SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
268831a3094SHong Zhang       z[i] += *v++ * x[*ib++];
269831a3094SHong Zhang       jmin++;
270831a3094SHong Zhang     }
271831a3094SHong Zhang     for (j=jmin; j<n; j++) {
27249b5e25fSSatish Balay       cval    = *ib;
27349b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
27449b5e25fSSatish Balay       z[i]    += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
27549b5e25fSSatish Balay     }
27649b5e25fSSatish Balay   }
27749b5e25fSSatish Balay 
2781ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2791ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
280*dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);  /* nz = (nz+m)/2 */
28149b5e25fSSatish Balay   PetscFunctionReturn(0);
28249b5e25fSSatish Balay }
28349b5e25fSSatish Balay 
2844a2ae208SSatish Balay #undef __FUNCT__
2854a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
286dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
28749b5e25fSSatish Balay {
28849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
28987828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
29049b5e25fSSatish Balay   MatScalar      *v;
2916849ba73SBarry Smith   PetscErrorCode ierr;
29213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
29398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
29449b5e25fSSatish Balay 
29549b5e25fSSatish Balay   PetscFunctionBegin;
2962dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2971ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2981ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
29949b5e25fSSatish Balay 
30049b5e25fSSatish Balay   v     = a->a;
30149b5e25fSSatish Balay   xb = x;
30249b5e25fSSatish Balay 
30349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
30449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
30549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
30649b5e25fSSatish Balay     ib = aj + *ai;
307831a3094SHong Zhang     jmin = 0;
30898c9bda7SSatish Balay     nonzerorow += (n>0);
3097fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
31049b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
31149b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
312831a3094SHong Zhang       v += 4; jmin++;
3137fbae186SHong Zhang     }
314831a3094SHong Zhang     for (j=jmin; j<n; j++) {
31549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
31649b5e25fSSatish Balay       cval       = ib[j]*2;
31749b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
31849b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
31949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
32049b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
32149b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
32249b5e25fSSatish Balay       v  += 4;
32349b5e25fSSatish Balay     }
32449b5e25fSSatish Balay     xb +=2; ai++;
32549b5e25fSSatish Balay   }
32649b5e25fSSatish Balay 
3271ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3281ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
329*dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
33049b5e25fSSatish Balay   PetscFunctionReturn(0);
33149b5e25fSSatish Balay }
33249b5e25fSSatish Balay 
3334a2ae208SSatish Balay #undef __FUNCT__
3344a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
335dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
33649b5e25fSSatish Balay {
33749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
33887828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
33949b5e25fSSatish Balay   MatScalar      *v;
3406849ba73SBarry Smith   PetscErrorCode ierr;
34113f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
34298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
34349b5e25fSSatish Balay 
34449b5e25fSSatish Balay   PetscFunctionBegin;
3452dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3461ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3471ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
34849b5e25fSSatish Balay 
34949b5e25fSSatish Balay   v    = a->a;
35049b5e25fSSatish Balay   xb   = x;
35149b5e25fSSatish Balay 
35249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
35349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
35449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
35549b5e25fSSatish Balay     ib = aj + *ai;
356831a3094SHong Zhang     jmin = 0;
35798c9bda7SSatish Balay     nonzerorow += (n>0);
3587fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
35949b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
36049b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
36149b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
362831a3094SHong Zhang       v += 9; jmin++;
3637fbae186SHong Zhang     }
364831a3094SHong Zhang     for (j=jmin; j<n; j++) {
36549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
36649b5e25fSSatish Balay       cval       = ib[j]*3;
36749b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
36849b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
36949b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
37049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
37149b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
37249b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
37349b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
37449b5e25fSSatish Balay       v  += 9;
37549b5e25fSSatish Balay     }
37649b5e25fSSatish Balay     xb +=3; ai++;
37749b5e25fSSatish Balay   }
37849b5e25fSSatish Balay 
3791ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3801ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
381*dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
38249b5e25fSSatish Balay   PetscFunctionReturn(0);
38349b5e25fSSatish Balay }
38449b5e25fSSatish Balay 
3854a2ae208SSatish Balay #undef __FUNCT__
3864a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
387dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
38849b5e25fSSatish Balay {
38949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
39087828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
39149b5e25fSSatish Balay   MatScalar      *v;
3926849ba73SBarry Smith   PetscErrorCode ierr;
39313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
39498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
39549b5e25fSSatish Balay 
39649b5e25fSSatish Balay   PetscFunctionBegin;
3972dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3981ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3991ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
40049b5e25fSSatish Balay 
40149b5e25fSSatish Balay   v     = a->a;
40249b5e25fSSatish Balay   xb = x;
40349b5e25fSSatish Balay 
40449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
40549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
40649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
40749b5e25fSSatish Balay     ib = aj + *ai;
408831a3094SHong Zhang     jmin = 0;
40998c9bda7SSatish Balay     nonzerorow += (n>0);
4107fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
41149b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
41249b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
41349b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
41449b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
415831a3094SHong Zhang       v += 16; jmin++;
4167fbae186SHong Zhang     }
417831a3094SHong Zhang     for (j=jmin; j<n; j++) {
41849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
41949b5e25fSSatish Balay       cval       = ib[j]*4;
42049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
42149b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
42249b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
42349b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
42449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
42549b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
42649b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
42749b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
42849b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
42949b5e25fSSatish Balay       v  += 16;
43049b5e25fSSatish Balay     }
43149b5e25fSSatish Balay     xb +=4; ai++;
43249b5e25fSSatish Balay   }
43349b5e25fSSatish Balay 
4341ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4351ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
436*dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
43749b5e25fSSatish Balay   PetscFunctionReturn(0);
43849b5e25fSSatish Balay }
43949b5e25fSSatish Balay 
4404a2ae208SSatish Balay #undef __FUNCT__
4414a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
442dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
44349b5e25fSSatish Balay {
44449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
44587828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
44649b5e25fSSatish Balay   MatScalar      *v;
4476849ba73SBarry Smith   PetscErrorCode ierr;
44813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
44998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
45049b5e25fSSatish Balay 
45149b5e25fSSatish Balay   PetscFunctionBegin;
4522dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4531ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4541ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
45549b5e25fSSatish Balay 
45649b5e25fSSatish Balay   v     = a->a;
45749b5e25fSSatish Balay   xb = x;
45849b5e25fSSatish Balay 
45949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
46049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
46149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
46249b5e25fSSatish Balay     ib = aj + *ai;
463831a3094SHong Zhang     jmin = 0;
46498c9bda7SSatish Balay     nonzerorow += (n>0);
4657fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
46649b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
46749b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
46849b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
46949b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
47049b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
471831a3094SHong Zhang       v += 25; jmin++;
4727fbae186SHong Zhang     }
473831a3094SHong Zhang     for (j=jmin; j<n; j++) {
47449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
47549b5e25fSSatish Balay       cval       = ib[j]*5;
47649b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
47749b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
47849b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
47949b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
48049b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
48149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
48249b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
48349b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
48449b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
48549b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
48649b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
48749b5e25fSSatish Balay       v  += 25;
48849b5e25fSSatish Balay     }
48949b5e25fSSatish Balay     xb +=5; ai++;
49049b5e25fSSatish Balay   }
49149b5e25fSSatish Balay 
4921ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4931ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
494*dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
49549b5e25fSSatish Balay   PetscFunctionReturn(0);
49649b5e25fSSatish Balay }
49749b5e25fSSatish Balay 
49849b5e25fSSatish Balay 
4994a2ae208SSatish Balay #undef __FUNCT__
5004a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
501dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
50249b5e25fSSatish Balay {
50349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
50487828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
50549b5e25fSSatish Balay   MatScalar      *v;
5066849ba73SBarry Smith   PetscErrorCode ierr;
50713f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
50898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
50949b5e25fSSatish Balay 
51049b5e25fSSatish Balay   PetscFunctionBegin;
5112dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5121ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5131ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
51449b5e25fSSatish Balay 
51549b5e25fSSatish Balay   v     = a->a;
51649b5e25fSSatish Balay   xb = x;
51749b5e25fSSatish Balay 
51849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
51949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
52049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
52149b5e25fSSatish Balay     ib = aj + *ai;
522831a3094SHong Zhang     jmin = 0;
52398c9bda7SSatish Balay     nonzerorow += (n>0);
5247fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
52549b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
52649b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
52749b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
52849b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
52949b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
53049b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
531831a3094SHong Zhang       v += 36; jmin++;
5327fbae186SHong Zhang     }
533831a3094SHong Zhang     for (j=jmin; j<n; j++) {
53449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
53549b5e25fSSatish Balay       cval       = ib[j]*6;
53649b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
53749b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
53849b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
53949b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
54049b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
54149b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
54249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
54349b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
54449b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
54549b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
54649b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
54749b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
54849b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
54949b5e25fSSatish Balay       v  += 36;
55049b5e25fSSatish Balay     }
55149b5e25fSSatish Balay     xb +=6; ai++;
55249b5e25fSSatish Balay   }
55349b5e25fSSatish Balay 
5541ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5551ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
556*dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
55749b5e25fSSatish Balay   PetscFunctionReturn(0);
55849b5e25fSSatish Balay }
5594a2ae208SSatish Balay #undef __FUNCT__
5604a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
561dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
56249b5e25fSSatish Balay {
56349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
56487828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
56549b5e25fSSatish Balay   MatScalar      *v;
5666849ba73SBarry Smith   PetscErrorCode ierr;
56713f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
56898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
56949b5e25fSSatish Balay 
57049b5e25fSSatish Balay   PetscFunctionBegin;
5712dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5721ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5731ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
57449b5e25fSSatish Balay 
57549b5e25fSSatish Balay   v     = a->a;
57649b5e25fSSatish Balay   xb = x;
57749b5e25fSSatish Balay 
57849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
57949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
58049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
58149b5e25fSSatish Balay     ib = aj + *ai;
582831a3094SHong Zhang     jmin = 0;
58398c9bda7SSatish Balay     nonzerorow += (n>0);
5847fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
58549b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
58649b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
58749b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
58849b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
58949b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
59049b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
59149b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
592831a3094SHong Zhang       v += 49; jmin++;
5937fbae186SHong Zhang     }
594831a3094SHong Zhang     for (j=jmin; j<n; j++) {
59549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
59649b5e25fSSatish Balay       cval       = ib[j]*7;
59749b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
59849b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
59949b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
60049b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
60149b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
60249b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
60349b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
60449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
60549b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
60649b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
60749b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
60849b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
60949b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
61049b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
61149b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
61249b5e25fSSatish Balay       v  += 49;
61349b5e25fSSatish Balay     }
61449b5e25fSSatish Balay     xb +=7; ai++;
61549b5e25fSSatish Balay   }
6161ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6171ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
618*dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
61949b5e25fSSatish Balay   PetscFunctionReturn(0);
62049b5e25fSSatish Balay }
62149b5e25fSSatish Balay 
62249b5e25fSSatish Balay /*
62349b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
62449b5e25fSSatish Balay */
6254a2ae208SSatish Balay #undef __FUNCT__
6264a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
627dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
62849b5e25fSSatish Balay {
62949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
63087828ca2SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
6310b60a74dSHong Zhang   MatScalar      *v;
632dfbe8321SBarry Smith   PetscErrorCode ierr;
633d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
63498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
63549b5e25fSSatish Balay 
63649b5e25fSSatish Balay   PetscFunctionBegin;
6372dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
6381ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
6391ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
64049b5e25fSSatish Balay 
64149b5e25fSSatish Balay   aj   = a->j;
64249b5e25fSSatish Balay   v    = a->a;
64349b5e25fSSatish Balay   ii   = a->i;
64449b5e25fSSatish Balay 
64549b5e25fSSatish Balay   if (!a->mult_work) {
646d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
64749b5e25fSSatish Balay   }
64849b5e25fSSatish Balay   work = a->mult_work;
64949b5e25fSSatish Balay 
65049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
65149b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
65249b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
65398c9bda7SSatish Balay     nonzerorow += (n>0);
65449b5e25fSSatish Balay 
65549b5e25fSSatish Balay     /* upper triangular part */
65649b5e25fSSatish Balay     for (j=0; j<n; j++) {
65749b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
65849b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
65949b5e25fSSatish Balay       workt += bs;
66049b5e25fSSatish Balay     }
66149b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
66249b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
66349b5e25fSSatish Balay 
66449b5e25fSSatish Balay     /* strict lower triangular part */
665831a3094SHong Zhang     idx = aj+ii[0];
666831a3094SHong Zhang     if (*idx == i){
66796b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
668831a3094SHong Zhang     }
66996b9376eSHong Zhang 
67049b5e25fSSatish Balay     if (ncols > 0){
67149b5e25fSSatish Balay       workt = work;
67287828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
673831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
674831a3094SHong Zhang       for (j=0; j<n; j++) {
675831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
67649b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
67749b5e25fSSatish Balay         workt += bs;
67849b5e25fSSatish Balay       }
67949b5e25fSSatish Balay     }
68049b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
68149b5e25fSSatish Balay   }
68249b5e25fSSatish Balay 
6831ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6841ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
685*dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr);
68649b5e25fSSatish Balay   PetscFunctionReturn(0);
68749b5e25fSSatish Balay }
68849b5e25fSSatish Balay 
6896a65c766SHong Zhang extern PetscErrorCode VecCopy_Seq(Vec,Vec);
6904a2ae208SSatish Balay #undef __FUNCT__
6914a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
692dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
69349b5e25fSSatish Balay {
69449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
695bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1;
69649b5e25fSSatish Balay   MatScalar      *v;
6976849ba73SBarry Smith   PetscErrorCode ierr;
69813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
69998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
70049b5e25fSSatish Balay 
70149b5e25fSSatish Balay   PetscFunctionBegin;
7026a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7031ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7041ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
70549b5e25fSSatish Balay   v  = a->a;
70649b5e25fSSatish Balay   xb = x;
70749b5e25fSSatish Balay 
70849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
70949b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
71049b5e25fSSatish Balay     x1 = xb[0];
71149b5e25fSSatish Balay     ib = aj + *ai;
712831a3094SHong Zhang     jmin = 0;
71398c9bda7SSatish Balay     nonzerorow += (n>0);
714831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
715831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
716831a3094SHong Zhang     }
717831a3094SHong Zhang     for (j=jmin; j<n; j++) {
71849b5e25fSSatish Balay       cval    = *ib;
71949b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
72049b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
72149b5e25fSSatish Balay     }
72249b5e25fSSatish Balay     xb++; ai++;
72349b5e25fSSatish Balay   }
72449b5e25fSSatish Balay 
7251ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
726bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
72749b5e25fSSatish Balay 
728*dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
72949b5e25fSSatish Balay   PetscFunctionReturn(0);
73049b5e25fSSatish Balay }
73149b5e25fSSatish Balay 
7324a2ae208SSatish Balay #undef __FUNCT__
7334a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
734dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
73549b5e25fSSatish Balay {
73649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
737bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2;
73849b5e25fSSatish Balay   MatScalar      *v;
7396849ba73SBarry Smith   PetscErrorCode ierr;
74013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
74198c9bda7SSatish Balay   PetscInt       nonzerorow=0;
74249b5e25fSSatish Balay 
74349b5e25fSSatish Balay   PetscFunctionBegin;
7446a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7451ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7461ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
74749b5e25fSSatish Balay 
74849b5e25fSSatish Balay   v  = a->a;
74949b5e25fSSatish Balay   xb = x;
75049b5e25fSSatish Balay 
75149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
75249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
75349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
75449b5e25fSSatish Balay     ib = aj + *ai;
755831a3094SHong Zhang     jmin = 0;
75698c9bda7SSatish Balay     nonzerorow += (n>0);
7577fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
75849b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
75949b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
760831a3094SHong Zhang       v += 4; jmin++;
7617fbae186SHong Zhang     }
762831a3094SHong Zhang     for (j=jmin; j<n; j++) {
76349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
76449b5e25fSSatish Balay       cval       = ib[j]*2;
76549b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
76649b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
76749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
76849b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
76949b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
77049b5e25fSSatish Balay       v  += 4;
77149b5e25fSSatish Balay     }
77249b5e25fSSatish Balay     xb +=2; ai++;
77349b5e25fSSatish Balay   }
7741ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
775bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
77649b5e25fSSatish Balay 
777*dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
77849b5e25fSSatish Balay   PetscFunctionReturn(0);
77949b5e25fSSatish Balay }
78049b5e25fSSatish Balay 
7814a2ae208SSatish Balay #undef __FUNCT__
7824a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
783dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
78449b5e25fSSatish Balay {
78549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
786bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3;
78749b5e25fSSatish Balay   MatScalar      *v;
7886849ba73SBarry Smith   PetscErrorCode ierr;
78913f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
79098c9bda7SSatish Balay   PetscInt       nonzerorow=0;
79149b5e25fSSatish Balay 
79249b5e25fSSatish Balay   PetscFunctionBegin;
7936a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7941ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7951ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
79649b5e25fSSatish Balay 
79749b5e25fSSatish Balay   v     = a->a;
79849b5e25fSSatish Balay   xb = x;
79949b5e25fSSatish Balay 
80049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
80149b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
80249b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
80349b5e25fSSatish Balay     ib = aj + *ai;
804831a3094SHong Zhang     jmin = 0;
80598c9bda7SSatish Balay     nonzerorow += (n>0);
8067fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
80749b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
80849b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
80949b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
810831a3094SHong Zhang      v += 9; jmin++;
8117fbae186SHong Zhang     }
812831a3094SHong Zhang     for (j=jmin; j<n; j++) {
81349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
81449b5e25fSSatish Balay       cval       = ib[j]*3;
81549b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
81649b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
81749b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
81849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
81949b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
82049b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
82149b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
82249b5e25fSSatish Balay       v  += 9;
82349b5e25fSSatish Balay     }
82449b5e25fSSatish Balay     xb +=3; ai++;
82549b5e25fSSatish Balay   }
82649b5e25fSSatish Balay 
8271ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
828bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
82949b5e25fSSatish Balay 
830*dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
83149b5e25fSSatish Balay   PetscFunctionReturn(0);
83249b5e25fSSatish Balay }
83349b5e25fSSatish Balay 
8344a2ae208SSatish Balay #undef __FUNCT__
8354a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
836dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
83749b5e25fSSatish Balay {
83849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
839bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
84049b5e25fSSatish Balay   MatScalar      *v;
8416849ba73SBarry Smith   PetscErrorCode ierr;
84213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
84398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
84449b5e25fSSatish Balay 
84549b5e25fSSatish Balay   PetscFunctionBegin;
8466a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8471ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8481ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
84949b5e25fSSatish Balay 
85049b5e25fSSatish Balay   v     = a->a;
85149b5e25fSSatish Balay   xb = x;
85249b5e25fSSatish Balay 
85349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
85449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
85549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
85649b5e25fSSatish Balay     ib = aj + *ai;
857831a3094SHong Zhang     jmin = 0;
85898c9bda7SSatish Balay     nonzerorow += (n>0);
8597fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
86049b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
86149b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
86249b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
86349b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
864831a3094SHong Zhang       v += 16; jmin++;
8657fbae186SHong Zhang     }
866831a3094SHong Zhang     for (j=jmin; j<n; j++) {
86749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
86849b5e25fSSatish Balay       cval       = ib[j]*4;
86949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
87049b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
87149b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
87249b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
87349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
87449b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
87549b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
87649b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
87749b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
87849b5e25fSSatish Balay       v  += 16;
87949b5e25fSSatish Balay     }
88049b5e25fSSatish Balay     xb +=4; ai++;
88149b5e25fSSatish Balay   }
88249b5e25fSSatish Balay 
8831ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
884bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
88549b5e25fSSatish Balay 
886*dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
88749b5e25fSSatish Balay   PetscFunctionReturn(0);
88849b5e25fSSatish Balay }
88949b5e25fSSatish Balay 
8904a2ae208SSatish Balay #undef __FUNCT__
8914a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
892dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
89349b5e25fSSatish Balay {
89449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
895bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
89649b5e25fSSatish Balay   MatScalar      *v;
8976849ba73SBarry Smith   PetscErrorCode ierr;
89813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
89998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
90049b5e25fSSatish Balay 
90149b5e25fSSatish Balay   PetscFunctionBegin;
9026a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9031ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9041ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
90549b5e25fSSatish Balay 
90649b5e25fSSatish Balay   v     = a->a;
90749b5e25fSSatish Balay   xb = x;
90849b5e25fSSatish Balay 
90949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
91049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
91149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
91249b5e25fSSatish Balay     ib = aj + *ai;
913831a3094SHong Zhang     jmin = 0;
91498c9bda7SSatish Balay     nonzerorow += (n>0);
9157fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
91649b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
91749b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
91849b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
91949b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
92049b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
921831a3094SHong Zhang       v += 25; jmin++;
9227fbae186SHong Zhang     }
923831a3094SHong Zhang     for (j=jmin; j<n; j++) {
92449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
92549b5e25fSSatish Balay       cval       = ib[j]*5;
92649b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
92749b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
92849b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
92949b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
93049b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
93149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
93249b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
93349b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
93449b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
93549b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
93649b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
93749b5e25fSSatish Balay       v  += 25;
93849b5e25fSSatish Balay     }
93949b5e25fSSatish Balay     xb +=5; ai++;
94049b5e25fSSatish Balay   }
94149b5e25fSSatish Balay 
9421ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
943bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
94449b5e25fSSatish Balay 
945*dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
94649b5e25fSSatish Balay   PetscFunctionReturn(0);
94749b5e25fSSatish Balay }
9484a2ae208SSatish Balay #undef __FUNCT__
9494a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
950dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
95149b5e25fSSatish Balay {
95249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
953bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
95449b5e25fSSatish Balay   MatScalar      *v;
9556849ba73SBarry Smith   PetscErrorCode ierr;
95613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
95798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
95849b5e25fSSatish Balay 
95949b5e25fSSatish Balay   PetscFunctionBegin;
9606a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9611ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9621ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
96349b5e25fSSatish Balay 
96449b5e25fSSatish Balay   v     = a->a;
96549b5e25fSSatish Balay   xb = x;
96649b5e25fSSatish Balay 
96749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
96849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
96949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
97049b5e25fSSatish Balay     ib = aj + *ai;
971831a3094SHong Zhang     jmin = 0;
97298c9bda7SSatish Balay     nonzerorow += (n>0);
9737fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
97449b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
97549b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
97649b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
97749b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
97849b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
97949b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
980831a3094SHong Zhang       v += 36; jmin++;
9817fbae186SHong Zhang     }
982831a3094SHong Zhang     for (j=jmin; j<n; j++) {
98349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
98449b5e25fSSatish Balay       cval       = ib[j]*6;
98549b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
98649b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
98749b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
98849b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
98949b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
99049b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
99149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
99249b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
99349b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
99449b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
99549b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
99649b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
99749b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
99849b5e25fSSatish Balay       v  += 36;
99949b5e25fSSatish Balay     }
100049b5e25fSSatish Balay     xb +=6; ai++;
100149b5e25fSSatish Balay   }
100249b5e25fSSatish Balay 
10031ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1004bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
100549b5e25fSSatish Balay 
1006*dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
100749b5e25fSSatish Balay   PetscFunctionReturn(0);
100849b5e25fSSatish Balay }
100949b5e25fSSatish Balay 
10104a2ae208SSatish Balay #undef __FUNCT__
10114a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
1012dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
101349b5e25fSSatish Balay {
101449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1015bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
101649b5e25fSSatish Balay   MatScalar      *v;
10176849ba73SBarry Smith   PetscErrorCode ierr;
101813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
101998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
102049b5e25fSSatish Balay 
102149b5e25fSSatish Balay   PetscFunctionBegin;
10226a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
10231ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
10241ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
102549b5e25fSSatish Balay 
102649b5e25fSSatish Balay   v     = a->a;
102749b5e25fSSatish Balay   xb = x;
102849b5e25fSSatish Balay 
102949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
103049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
103149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
103249b5e25fSSatish Balay     ib = aj + *ai;
1033831a3094SHong Zhang     jmin = 0;
103498c9bda7SSatish Balay     nonzerorow += (n>0);
10357fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
103649b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
103749b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
103849b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
103949b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
104049b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
104149b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
104249b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1043831a3094SHong Zhang       v += 49; jmin++;
10447fbae186SHong Zhang     }
1045831a3094SHong Zhang     for (j=jmin; j<n; j++) {
104649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
104749b5e25fSSatish Balay       cval       = ib[j]*7;
104849b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
104949b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
105049b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
105149b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
105249b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
105349b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
105449b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
105549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
105649b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
105749b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
105849b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
105949b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
106049b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
106149b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
106249b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
106349b5e25fSSatish Balay       v  += 49;
106449b5e25fSSatish Balay     }
106549b5e25fSSatish Balay     xb +=7; ai++;
106649b5e25fSSatish Balay   }
106749b5e25fSSatish Balay 
10681ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1069bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
107049b5e25fSSatish Balay 
1071*dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
107249b5e25fSSatish Balay   PetscFunctionReturn(0);
107349b5e25fSSatish Balay }
107449b5e25fSSatish Balay 
10754a2ae208SSatish Balay #undef __FUNCT__
10764a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1077dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
107849b5e25fSSatish Balay {
107949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1080bba805e6SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1081066653e3SSatish Balay   MatScalar      *v;
1082dfbe8321SBarry Smith   PetscErrorCode ierr;
1083d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
108498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
108549b5e25fSSatish Balay 
108649b5e25fSSatish Balay   PetscFunctionBegin;
10876a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
10881ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
10891ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
109049b5e25fSSatish Balay 
109149b5e25fSSatish Balay   aj   = a->j;
109249b5e25fSSatish Balay   v    = a->a;
109349b5e25fSSatish Balay   ii   = a->i;
109449b5e25fSSatish Balay 
109549b5e25fSSatish Balay   if (!a->mult_work) {
1096d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
109749b5e25fSSatish Balay   }
109849b5e25fSSatish Balay   work = a->mult_work;
109949b5e25fSSatish Balay 
110049b5e25fSSatish Balay 
110149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
110249b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
110349b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
110498c9bda7SSatish Balay     nonzerorow += (n>0);
110549b5e25fSSatish Balay 
110649b5e25fSSatish Balay     /* upper triangular part */
110749b5e25fSSatish Balay     for (j=0; j<n; j++) {
110849b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
110949b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
111049b5e25fSSatish Balay       workt += bs;
111149b5e25fSSatish Balay     }
111249b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
111349b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
111449b5e25fSSatish Balay 
111549b5e25fSSatish Balay     /* strict lower triangular part */
1116831a3094SHong Zhang     idx = aj+ii[0];
1117831a3094SHong Zhang     if (*idx == i){
111896b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1119831a3094SHong Zhang     }
112049b5e25fSSatish Balay     if (ncols > 0){
112149b5e25fSSatish Balay       workt = work;
112287828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1123831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1124831a3094SHong Zhang       for (j=0; j<n; j++) {
1125831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
112649b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
112749b5e25fSSatish Balay         workt += bs;
112849b5e25fSSatish Balay       }
112949b5e25fSSatish Balay     }
113049b5e25fSSatish Balay 
113149b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
113249b5e25fSSatish Balay   }
113349b5e25fSSatish Balay 
11341ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1135bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
113649b5e25fSSatish Balay 
1137*dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
113849b5e25fSSatish Balay   PetscFunctionReturn(0);
113949b5e25fSSatish Balay }
114049b5e25fSSatish Balay 
11414a2ae208SSatish Balay #undef __FUNCT__
11424a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1143f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
114449b5e25fSSatish Balay {
114549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1146f4df32b1SMatthew Knepley   PetscScalar    oalpha = alpha;
1147efee365bSSatish Balay   PetscErrorCode ierr;
11480805154bSBarry Smith   PetscBLASInt   one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz);
114949b5e25fSSatish Balay 
115049b5e25fSSatish Balay   PetscFunctionBegin;
1151f4df32b1SMatthew Knepley   BLASscal_(&totalnz,&oalpha,a->a,&one);
1152efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
115349b5e25fSSatish Balay   PetscFunctionReturn(0);
115449b5e25fSSatish Balay }
115549b5e25fSSatish Balay 
11564a2ae208SSatish Balay #undef __FUNCT__
11574a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1158dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
115949b5e25fSSatish Balay {
116049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
116149b5e25fSSatish Balay   MatScalar      *v = a->a;
116249b5e25fSSatish Balay   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1163d0f46423SBarry Smith   PetscInt       i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
11646849ba73SBarry Smith   PetscErrorCode ierr;
116513f74950SBarry Smith   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
116649b5e25fSSatish Balay 
116749b5e25fSSatish Balay   PetscFunctionBegin;
116849b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
116949b5e25fSSatish Balay     for (k=0; k<mbs; k++){
117049b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1171831a3094SHong Zhang       col  = aj + jmin;
1172831a3094SHong Zhang       if (*col == k){         /* diagonal block */
117349b5e25fSSatish Balay         for (i=0; i<bs2; i++){
117449b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
117549b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
117649b5e25fSSatish Balay #else
117749b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
117849b5e25fSSatish Balay #endif
117949b5e25fSSatish Balay         }
1180831a3094SHong Zhang         jmin++;
1181831a3094SHong Zhang       }
1182831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
118349b5e25fSSatish Balay         for (i=0; i<bs2; i++){
118449b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
118549b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
118649b5e25fSSatish Balay #else
118749b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
118849b5e25fSSatish Balay #endif
118949b5e25fSSatish Balay         }
119049b5e25fSSatish Balay       }
119149b5e25fSSatish Balay     }
119249b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
11930b8dc8d2SHong Zhang   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
11940b8dc8d2SHong Zhang     ierr = PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);CHKERRQ(ierr);
11950b8dc8d2SHong Zhang     jl   = il + mbs;
11960b8dc8d2SHong Zhang     sum  = (PetscReal*)(jl + mbs);
11970b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11980b8dc8d2SHong Zhang     il[0] = 0;
119949b5e25fSSatish Balay 
120049b5e25fSSatish Balay     *norm = 0.0;
120149b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
120249b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
120349b5e25fSSatish Balay       /*-- col sum --*/
120449b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1205831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1206831a3094SHong Zhang                   at step k */
120749b5e25fSSatish Balay       while (i<mbs){
120849b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
120949b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
121049b5e25fSSatish Balay         for (j=0; j<bs; j++){
121149b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
121249b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
121349b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
121449b5e25fSSatish Balay           }
121549b5e25fSSatish Balay         }
121649b5e25fSSatish Balay         /* update il, jl */
1217831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1218831a3094SHong Zhang         jmax = a->i[i+1];
121949b5e25fSSatish Balay         if (jmin < jmax){
122049b5e25fSSatish Balay           il[i] = jmin;
122149b5e25fSSatish Balay           j   = a->j[jmin];
122249b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
122349b5e25fSSatish Balay         }
122449b5e25fSSatish Balay         i = nexti;
122549b5e25fSSatish Balay       }
122649b5e25fSSatish Balay       /*-- row sum --*/
122749b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
122849b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
122949b5e25fSSatish Balay         for (j=0; j<bs; j++){
123049b5e25fSSatish Balay           v = a->a + i*bs2 + j;
123149b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
12320b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
123349b5e25fSSatish Balay           }
123449b5e25fSSatish Balay         }
123549b5e25fSSatish Balay       }
123649b5e25fSSatish Balay       /* add k_th block row to il, jl */
1237831a3094SHong Zhang       col = aj+jmin;
1238831a3094SHong Zhang       if (*col == k) jmin++;
123949b5e25fSSatish Balay       if (jmin < jmax){
124049b5e25fSSatish Balay         il[k] = jmin;
12410b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
124249b5e25fSSatish Balay       }
124349b5e25fSSatish Balay       for (j=0; j<bs; j++){
124449b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
124549b5e25fSSatish Balay       }
124649b5e25fSSatish Balay     }
124749b5e25fSSatish Balay     ierr = PetscFree(il);CHKERRQ(ierr);
124849b5e25fSSatish Balay   } else {
1249347d480fSBarry Smith     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
125049b5e25fSSatish Balay   }
125149b5e25fSSatish Balay   PetscFunctionReturn(0);
125249b5e25fSSatish Balay }
125349b5e25fSSatish Balay 
12544a2ae208SSatish Balay #undef __FUNCT__
12554a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1256dfbe8321SBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
125749b5e25fSSatish Balay {
125849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1259dfbe8321SBarry Smith   PetscErrorCode ierr;
126049b5e25fSSatish Balay 
126149b5e25fSSatish Balay   PetscFunctionBegin;
126249b5e25fSSatish Balay 
126349b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1264d0f46423SBarry Smith   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1265ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1266ef511fbeSHong Zhang     PetscFunctionReturn(0);
126749b5e25fSSatish Balay   }
126849b5e25fSSatish Balay 
126949b5e25fSSatish Balay   /* if the a->i are the same */
127013f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1271abc0a331SBarry Smith   if (!*flg) {
127249b5e25fSSatish Balay     PetscFunctionReturn(0);
127349b5e25fSSatish Balay   }
127449b5e25fSSatish Balay 
127549b5e25fSSatish Balay   /* if a->j are the same */
127613f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1277abc0a331SBarry Smith   if (!*flg) {
127849b5e25fSSatish Balay     PetscFunctionReturn(0);
127949b5e25fSSatish Balay   }
128049b5e25fSSatish Balay   /* if a->a are the same */
1281d0f46423SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1282935af2e7SHong Zhang   PetscFunctionReturn(0);
128349b5e25fSSatish Balay }
128449b5e25fSSatish Balay 
12854a2ae208SSatish Balay #undef __FUNCT__
12864a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1287dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
128849b5e25fSSatish Balay {
128949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1290dfbe8321SBarry Smith   PetscErrorCode ierr;
129113f74950SBarry Smith   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
129287828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
129349b5e25fSSatish Balay   MatScalar      *aa,*aa_j;
129449b5e25fSSatish Balay 
129549b5e25fSSatish Balay   PetscFunctionBegin;
1296d0f46423SBarry Smith   bs   = A->rmap->bs;
129782799104SHong Zhang   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
129882799104SHong Zhang 
129949b5e25fSSatish Balay   aa   = a->a;
130049b5e25fSSatish Balay   ai   = a->i;
130149b5e25fSSatish Balay   aj   = a->j;
130249b5e25fSSatish Balay   ambs = a->mbs;
130349b5e25fSSatish Balay   bs2  = a->bs2;
130449b5e25fSSatish Balay 
13052dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
13061ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
130749b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1308d0f46423SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
130949b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
131049b5e25fSSatish Balay     j=ai[i];
131149b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
131249b5e25fSSatish Balay       row  = i*bs;
131349b5e25fSSatish Balay       aa_j = aa + j*bs2;
131482799104SHong Zhang       if (A->factor && bs==1){
131582799104SHong Zhang         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
131682799104SHong Zhang       } else {
131749b5e25fSSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
131849b5e25fSSatish Balay       }
131949b5e25fSSatish Balay     }
132082799104SHong Zhang   }
132182799104SHong Zhang 
13221ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
132349b5e25fSSatish Balay   PetscFunctionReturn(0);
132449b5e25fSSatish Balay }
132549b5e25fSSatish Balay 
13264a2ae208SSatish Balay #undef __FUNCT__
13274a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1328dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
132949b5e25fSSatish Balay {
133049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
13315e90f9d9SHong Zhang   PetscScalar    *l,x,*li,*ri;
133249b5e25fSSatish Balay   MatScalar      *aa,*v;
1333dfbe8321SBarry Smith   PetscErrorCode ierr;
13345e90f9d9SHong Zhang   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1335b3bf805bSHong Zhang   PetscTruth     flg;
133649b5e25fSSatish Balay 
133749b5e25fSSatish Balay   PetscFunctionBegin;
1338b3bf805bSHong Zhang   if (ll != rr){
1339b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1340b3bf805bSHong Zhang     if (!flg)
1341b3bf805bSHong Zhang       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1342b3bf805bSHong Zhang   }
1343b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
134449b5e25fSSatish Balay   ai  = a->i;
134549b5e25fSSatish Balay   aj  = a->j;
134649b5e25fSSatish Balay   aa  = a->a;
1347d0f46423SBarry Smith   m   = A->rmap->N;
1348d0f46423SBarry Smith   bs  = A->rmap->bs;
134949b5e25fSSatish Balay   mbs = a->mbs;
135049b5e25fSSatish Balay   bs2 = a->bs2;
135149b5e25fSSatish Balay 
13521ebc52fbSHong Zhang   ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
135349b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1354347d480fSBarry Smith   if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
135549b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
135649b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
135749b5e25fSSatish Balay     li = l + i*bs;
135849b5e25fSSatish Balay     v  = aa + bs2*ai[i];
135949b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
136049b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
13615e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
136249b5e25fSSatish Balay         x = ri[k];
136349b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
136449b5e25fSSatish Balay       }
136549b5e25fSSatish Balay     }
136649b5e25fSSatish Balay   }
13671ebc52fbSHong Zhang   ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1368*dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
136949b5e25fSSatish Balay   PetscFunctionReturn(0);
137049b5e25fSSatish Balay }
137149b5e25fSSatish Balay 
13724a2ae208SSatish Balay #undef __FUNCT__
13734a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1374dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
137549b5e25fSSatish Balay {
137649b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
137749b5e25fSSatish Balay 
137849b5e25fSSatish Balay   PetscFunctionBegin;
137949b5e25fSSatish Balay   info->block_size     = a->bs2;
13806c6c5352SBarry Smith   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
13816c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
138249b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
138349b5e25fSSatish Balay   info->assemblies   = A->num_ass;
138449b5e25fSSatish Balay   info->mallocs      = a->reallocs;
13857adad957SLisandro Dalcin   info->memory       = ((PetscObject)A)->mem;
138649b5e25fSSatish Balay   if (A->factor) {
138749b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
138849b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
138949b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
139049b5e25fSSatish Balay   } else {
139149b5e25fSSatish Balay     info->fill_ratio_given  = 0;
139249b5e25fSSatish Balay     info->fill_ratio_needed = 0;
139349b5e25fSSatish Balay     info->factor_mallocs    = 0;
139449b5e25fSSatish Balay   }
139549b5e25fSSatish Balay   PetscFunctionReturn(0);
139649b5e25fSSatish Balay }
139749b5e25fSSatish Balay 
139849b5e25fSSatish Balay 
13994a2ae208SSatish Balay #undef __FUNCT__
14004a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1401dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
140249b5e25fSSatish Balay {
140349b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1404dfbe8321SBarry Smith   PetscErrorCode ierr;
140549b5e25fSSatish Balay 
140649b5e25fSSatish Balay   PetscFunctionBegin;
140749b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
140849b5e25fSSatish Balay   PetscFunctionReturn(0);
140949b5e25fSSatish Balay }
1410dc354874SHong Zhang 
14114a2ae208SSatish Balay #undef __FUNCT__
1412985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ"
1413985db425SBarry Smith /*
1414985db425SBarry Smith    This code does not work since it only checks the upper triangular part of
1415985db425SBarry Smith   the matrix. Hence it is not listed in the function table.
1416985db425SBarry Smith */
1417985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1418dc354874SHong Zhang {
1419dc354874SHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1420dfbe8321SBarry Smith   PetscErrorCode ierr;
142113f74950SBarry Smith   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1422c3fca9a7SHong Zhang   PetscReal      atmp;
1423273d9f13SBarry Smith   MatScalar      *aa;
1424985db425SBarry Smith   PetscScalar    *x;
142513f74950SBarry Smith   PetscInt       ncols,brow,bcol,krow,kcol;
1426dc354874SHong Zhang 
1427dc354874SHong Zhang   PetscFunctionBegin;
1428985db425SBarry Smith   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1429dc354874SHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1430d0f46423SBarry Smith   bs   = A->rmap->bs;
1431dc354874SHong Zhang   aa   = a->a;
1432dc354874SHong Zhang   ai   = a->i;
1433dc354874SHong Zhang   aj   = a->j;
143444117c81SHong Zhang   mbs = a->mbs;
1435dc354874SHong Zhang 
1436985db425SBarry Smith   ierr = VecSet(v,0.0);CHKERRQ(ierr);
14371ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1438dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1439d0f46423SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
144044117c81SHong Zhang   for (i=0; i<mbs; i++) {
1441d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1442d0f6400bSHong Zhang     brow  = bs*i;
144344117c81SHong Zhang     for (j=0; j<ncols; j++){
1444d0f6400bSHong Zhang       bcol = bs*(*aj);
144544117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1446d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
144744117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1448d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1449d0f6400bSHong Zhang           row = brow + krow;    /* row index */
1450c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1451c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
145244117c81SHong Zhang         }
145344117c81SHong Zhang       }
1454d0f6400bSHong Zhang       aj++;
1455dc354874SHong Zhang     }
1456dc354874SHong Zhang   }
14571ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1458dc354874SHong Zhang   PetscFunctionReturn(0);
1459dc354874SHong Zhang }
1460