xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision 061b26675e9e52d40004d117d091571c1aa6a536)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
249b5e25fSSatish Balay 
37c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h"
47c4f633dSBarry Smith #include "../src/inline/spops.h"
57c4f633dSBarry Smith #include "../src/inline/ilu.h"
649b5e25fSSatish Balay #include "petscbt.h"
77c4f633dSBarry Smith #include "../src/mat/impls/sbaij/seq/sbaij.h"
849b5e25fSSatish Balay 
94a2ae208SSatish Balay #undef __FUNCT__
104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1113f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1249b5e25fSSatish Balay {
135eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
146849ba73SBarry Smith   PetscErrorCode ierr;
155d0c19d7SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
165d0c19d7SBarry Smith   const PetscInt *idx;
17521d7252SBarry Smith   PetscBT        table,table0;
18d94109b8SHong Zhang 
19d94109b8SHong Zhang   PetscFunctionBegin;
20b3bf805bSHong Zhang   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
21d94109b8SHong Zhang   mbs = a->mbs;
22d94109b8SHong Zhang   ai  = a->i;
23d94109b8SHong Zhang   aj  = a->j;
24d0f46423SBarry Smith   bs  = A->rmap->bs;
25d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr);
2613f74950SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
27d0f46423SBarry Smith   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
28d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr);
29d94109b8SHong Zhang 
30d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
31d94109b8SHong Zhang     isz  = 0;
32d94109b8SHong Zhang     ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr);
33d94109b8SHong Zhang 
34d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
35d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
36d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
37d94109b8SHong Zhang 
38d94109b8SHong Zhang     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
39dbe03f88SHong Zhang     bcol_max = 0;
40d94109b8SHong Zhang     for (j=0; j<n ; ++j){
41d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
42d94109b8SHong Zhang       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43dbe03f88SHong Zhang       if(!PetscBTLookupSet(table,brow)) {
44dbe03f88SHong Zhang         nidx[isz++] = brow;
45dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
46dbe03f88SHong Zhang       }
47d94109b8SHong Zhang     }
48d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
49d94109b8SHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
50d94109b8SHong Zhang 
51d94109b8SHong Zhang     k = 0;
52d94109b8SHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
53d94109b8SHong Zhang       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
54d94109b8SHong Zhang       ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr);
55efee365bSSatish Balay       for (l=k; l<isz; l++) { ierr = PetscBTSet(table0,nidx[l]);CHKERRQ(ierr); }
56d94109b8SHong Zhang 
57d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
58d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++){
59d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
60d94109b8SHong Zhang         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
61d94109b8SHong Zhang           for (l = start; l<end ; l++){
62d94109b8SHong Zhang             bcol = aj[l];
63d94109b8SHong Zhang             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
64d94109b8SHong Zhang           }
65d94109b8SHong Zhang           k++;
66d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
67d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
68d94109b8SHong Zhang           for (l = start; l<end ; l++){
69d94109b8SHong Zhang             bcol = aj[l];
70dbe03f88SHong Zhang             if (bcol > bcol_max) break;
71d94109b8SHong Zhang             if (PetscBTLookup(table0,bcol)){
72d94109b8SHong Zhang               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
73d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
74d94109b8SHong Zhang             }
75d94109b8SHong Zhang           }
76d94109b8SHong Zhang         }
77d94109b8SHong Zhang       }
78d94109b8SHong Zhang     } /* for each overlap */
79d94109b8SHong Zhang 
80d94109b8SHong Zhang     /* expand the Index Set */
81d94109b8SHong Zhang     for (j=0; j<isz; j++) {
82d94109b8SHong Zhang       for (k=0; k<bs; k++)
83d94109b8SHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
84d94109b8SHong Zhang     }
85d94109b8SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
86d94109b8SHong Zhang   }
87d94109b8SHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
88d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
89d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
90d94109b8SHong Zhang   ierr = PetscBTDestroy(table0);CHKERRQ(ierr);
915eee224dSHong Zhang   PetscFunctionReturn(0);
9249b5e25fSSatish Balay }
9349b5e25fSSatish Balay 
944a2ae208SSatish Balay #undef __FUNCT__
954a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
964aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
9749b5e25fSSatish Balay {
9849b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
996849ba73SBarry Smith   PetscErrorCode ierr;
10013f74950SBarry Smith   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
10113f74950SBarry Smith   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
1025d0c19d7SBarry Smith   PetscInt       nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
1035d0c19d7SBarry Smith   const PetscInt *irow,*aj = a->j,*ai = a->i;
10449b5e25fSSatish Balay   MatScalar      *mat_a;
10549b5e25fSSatish Balay   Mat            C;
10614ca34e6SBarry Smith   PetscTruth     flag,sorted;
10749b5e25fSSatish Balay 
10849b5e25fSSatish Balay   PetscFunctionBegin;
109e005ede5SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
11014ca34e6SBarry Smith   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
11114ca34e6SBarry Smith   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
11249b5e25fSSatish Balay 
11349b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11449b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11549b5e25fSSatish Balay 
11613f74950SBarry Smith   ierr  = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr);
11749b5e25fSSatish Balay   ssmap = smap;
11813f74950SBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
11913f74950SBarry Smith   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
12049b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
12149b5e25fSSatish Balay   /* determine lens of each row */
12249b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12349b5e25fSSatish Balay     kstart  = ai[irow[i]];
12449b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
12549b5e25fSSatish Balay     lens[i] = 0;
12649b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
12749b5e25fSSatish Balay         if (ssmap[aj[k]]) {
12849b5e25fSSatish Balay           lens[i]++;
12949b5e25fSSatish Balay         }
13049b5e25fSSatish Balay       }
13149b5e25fSSatish Balay     }
13249b5e25fSSatish Balay   /* Create and fill new matrix */
13349b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
13449b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
13549b5e25fSSatish Balay 
136d0f46423SBarry Smith     if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
13713f74950SBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
138abc0a331SBarry Smith     if (!flag) {
139347d480fSBarry Smith       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
14049b5e25fSSatish Balay     }
14113f74950SBarry Smith     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
14249b5e25fSSatish Balay     C = *B;
14349b5e25fSSatish Balay   } else {
1447adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
145f69a0ea3SMatthew Knepley     ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1467adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
147ab93d7beSBarry Smith     ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr);
14849b5e25fSSatish Balay   }
14949b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
15049b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
15149b5e25fSSatish Balay     row    = irow[i];
15249b5e25fSSatish Balay     kstart = ai[row];
15349b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
15449b5e25fSSatish Balay     mat_i  = c->i[i];
15549b5e25fSSatish Balay     mat_j  = c->j + mat_i;
15649b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
15749b5e25fSSatish Balay     mat_ilen = c->ilen + i;
15849b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
15949b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
16049b5e25fSSatish Balay         *mat_j++ = tcol - 1;
16149b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
16249b5e25fSSatish Balay         mat_a   += bs2;
16349b5e25fSSatish Balay         (*mat_ilen)++;
16449b5e25fSSatish Balay       }
16549b5e25fSSatish Balay     }
16649b5e25fSSatish Balay   }
16749b5e25fSSatish Balay 
16849b5e25fSSatish Balay   /* Free work space */
16949b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
17049b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
17149b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17249b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17349b5e25fSSatish Balay 
17449b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
17549b5e25fSSatish Balay   *B = C;
17649b5e25fSSatish Balay   PetscFunctionReturn(0);
17749b5e25fSSatish Balay }
17849b5e25fSSatish Balay 
1794a2ae208SSatish Balay #undef __FUNCT__
1804a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
1814aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
18249b5e25fSSatish Balay {
18349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
18449b5e25fSSatish Balay   IS             is1;
1856849ba73SBarry Smith   PetscErrorCode ierr;
1865d0c19d7SBarry Smith   PetscInt       *vary,*iary,nrows,i,bs=A->rmap->bs,count;
1875d0c19d7SBarry Smith   const PetscInt *irow;
18849b5e25fSSatish Balay 
18949b5e25fSSatish Balay   PetscFunctionBegin;
190e005ede5SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
19149b5e25fSSatish Balay 
19249b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
19349b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
19449b5e25fSSatish Balay 
19549b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
19649b5e25fSSatish Balay    and form the IS with compressed IS */
19713f74950SBarry Smith   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr);
19849b5e25fSSatish Balay   iary = vary + a->mbs;
19913f74950SBarry Smith   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
20049b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
20149b5e25fSSatish Balay 
20249b5e25fSSatish Balay   count = 0;
20349b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
204e005ede5SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
20549b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
20649b5e25fSSatish Balay   }
20749b5e25fSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
20849b5e25fSSatish Balay 
20949b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
21049b5e25fSSatish Balay   ierr = PetscFree(vary);CHKERRQ(ierr);
21149b5e25fSSatish Balay 
2124aa3045dSJed Brown   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,scall,B);CHKERRQ(ierr);
21349b5e25fSSatish Balay   ISDestroy(is1);
21449b5e25fSSatish Balay   PetscFunctionReturn(0);
21549b5e25fSSatish Balay }
21649b5e25fSSatish Balay 
2174a2ae208SSatish Balay #undef __FUNCT__
2184a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
21913f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
22049b5e25fSSatish Balay {
2216849ba73SBarry Smith   PetscErrorCode ierr;
22213f74950SBarry Smith   PetscInt       i;
22349b5e25fSSatish Balay 
22449b5e25fSSatish Balay   PetscFunctionBegin;
22549b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
22682502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
22749b5e25fSSatish Balay   }
22849b5e25fSSatish Balay 
22949b5e25fSSatish Balay   for (i=0; i<n; i++) {
2304aa3045dSJed Brown     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr);
23149b5e25fSSatish Balay   }
23249b5e25fSSatish Balay   PetscFunctionReturn(0);
23349b5e25fSSatish Balay }
23449b5e25fSSatish Balay 
23549b5e25fSSatish Balay /* -------------------------------------------------------*/
23649b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
23749b5e25fSSatish Balay /* -------------------------------------------------------*/
238d9eff348SSatish Balay #include "petscblaslapack.h"
23949b5e25fSSatish Balay 
2404a2ae208SSatish Balay #undef __FUNCT__
2414a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1"
242dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
24349b5e25fSSatish Balay {
24449b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
245*061b2667SBarry Smith   const PetscScalar *x;
246*061b2667SBarry Smith   PetscScalar       *z,x1,sum;
247*061b2667SBarry Smith   const MatScalar   *v;
2486849ba73SBarry Smith   PetscErrorCode    ierr;
249*061b2667SBarry Smith   PetscInt          mbs=a->mbs,i,j,nz;
250*061b2667SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
25149b5e25fSSatish Balay 
25249b5e25fSSatish Balay   PetscFunctionBegin;
253*061b2667SBarry Smith   ierr = VecSet(zz,0.0);CHKERRQ(ierr);
254*061b2667SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2551ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
25649b5e25fSSatish Balay 
25749b5e25fSSatish Balay   v  = a->a;
258*061b2667SBarry Smith   ib = aj;
25949b5e25fSSatish Balay 
26049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
261*061b2667SBarry Smith     nz   = ai[i+1] - ai[i];        /* length of i_th row of A */
262*061b2667SBarry Smith     x1   = x[i];
263*061b2667SBarry Smith     sum  = v[0]*x1;                /* diagonal term */
264*061b2667SBarry Smith     for (j=1; j<nz; j++) {
265*061b2667SBarry Smith       z[ib[j]] += v[j] * x1;       /* (strict lower triangular part of A)*x  */
266*061b2667SBarry Smith       sum      += v[j] * x[ib[j]]; /* (strict upper triangular part of A)*x  */
267831a3094SHong Zhang     }
268*061b2667SBarry Smith     z[i] += sum;
269*061b2667SBarry Smith     v    += nz;
270*061b2667SBarry Smith     ib   += nz;
27149b5e25fSSatish Balay   }
27249b5e25fSSatish Balay 
273*061b2667SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2741ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
275*061b2667SBarry Smith   ierr = PetscLogFlops(2.0*(2.0*a->nz - mbs) - mbs);CHKERRQ(ierr);
27649b5e25fSSatish Balay   PetscFunctionReturn(0);
27749b5e25fSSatish Balay }
27849b5e25fSSatish Balay 
2794a2ae208SSatish Balay #undef __FUNCT__
2804a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
281dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
28249b5e25fSSatish Balay {
28349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
28487828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
28549b5e25fSSatish Balay   MatScalar      *v;
2866849ba73SBarry Smith   PetscErrorCode ierr;
28713f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
28898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
28949b5e25fSSatish Balay 
29049b5e25fSSatish Balay   PetscFunctionBegin;
2912dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2921ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2931ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
29449b5e25fSSatish Balay 
29549b5e25fSSatish Balay   v     = a->a;
29649b5e25fSSatish Balay   xb = x;
29749b5e25fSSatish Balay 
29849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
29949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
30049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
30149b5e25fSSatish Balay     ib = aj + *ai;
302831a3094SHong Zhang     jmin = 0;
30398c9bda7SSatish Balay     nonzerorow += (n>0);
3047fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
30549b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
30649b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
307831a3094SHong Zhang       v += 4; jmin++;
3087fbae186SHong Zhang     }
309831a3094SHong Zhang     for (j=jmin; j<n; j++) {
31049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
31149b5e25fSSatish Balay       cval       = ib[j]*2;
31249b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
31349b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
31449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
31549b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
31649b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
31749b5e25fSSatish Balay       v  += 4;
31849b5e25fSSatish Balay     }
31949b5e25fSSatish Balay     xb +=2; ai++;
32049b5e25fSSatish Balay   }
32149b5e25fSSatish Balay 
3221ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3231ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
324dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
32549b5e25fSSatish Balay   PetscFunctionReturn(0);
32649b5e25fSSatish Balay }
32749b5e25fSSatish Balay 
3284a2ae208SSatish Balay #undef __FUNCT__
3294a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
330dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
33149b5e25fSSatish Balay {
33249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
33387828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
33449b5e25fSSatish Balay   MatScalar      *v;
3356849ba73SBarry Smith   PetscErrorCode ierr;
33613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
33798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
33849b5e25fSSatish Balay 
33949b5e25fSSatish Balay   PetscFunctionBegin;
3402dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3411ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3421ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
34349b5e25fSSatish Balay 
34449b5e25fSSatish Balay   v    = a->a;
34549b5e25fSSatish Balay   xb   = x;
34649b5e25fSSatish Balay 
34749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
34849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
34949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
35049b5e25fSSatish Balay     ib = aj + *ai;
351831a3094SHong Zhang     jmin = 0;
35298c9bda7SSatish Balay     nonzerorow += (n>0);
3537fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
35449b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
35549b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
35649b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
357831a3094SHong Zhang       v += 9; jmin++;
3587fbae186SHong Zhang     }
359831a3094SHong Zhang     for (j=jmin; j<n; j++) {
36049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
36149b5e25fSSatish Balay       cval       = ib[j]*3;
36249b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
36349b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
36449b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
36549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
36649b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
36749b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
36849b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
36949b5e25fSSatish Balay       v  += 9;
37049b5e25fSSatish Balay     }
37149b5e25fSSatish Balay     xb +=3; ai++;
37249b5e25fSSatish Balay   }
37349b5e25fSSatish Balay 
3741ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3751ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
376dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
37749b5e25fSSatish Balay   PetscFunctionReturn(0);
37849b5e25fSSatish Balay }
37949b5e25fSSatish Balay 
3804a2ae208SSatish Balay #undef __FUNCT__
3814a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
382dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
38349b5e25fSSatish Balay {
38449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
38587828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
38649b5e25fSSatish Balay   MatScalar      *v;
3876849ba73SBarry Smith   PetscErrorCode ierr;
38813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
38998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
39049b5e25fSSatish Balay 
39149b5e25fSSatish Balay   PetscFunctionBegin;
3922dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3931ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3941ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
39549b5e25fSSatish Balay 
39649b5e25fSSatish Balay   v     = a->a;
39749b5e25fSSatish Balay   xb = x;
39849b5e25fSSatish Balay 
39949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
40049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
40149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
40249b5e25fSSatish Balay     ib = aj + *ai;
403831a3094SHong Zhang     jmin = 0;
40498c9bda7SSatish Balay     nonzerorow += (n>0);
4057fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
40649b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
40749b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
40849b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
40949b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
410831a3094SHong Zhang       v += 16; jmin++;
4117fbae186SHong Zhang     }
412831a3094SHong Zhang     for (j=jmin; j<n; j++) {
41349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
41449b5e25fSSatish Balay       cval       = ib[j]*4;
41549b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
41649b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
41749b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
41849b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
41949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
42049b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
42149b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
42249b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
42349b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
42449b5e25fSSatish Balay       v  += 16;
42549b5e25fSSatish Balay     }
42649b5e25fSSatish Balay     xb +=4; ai++;
42749b5e25fSSatish Balay   }
42849b5e25fSSatish Balay 
4291ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4301ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
431dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
43249b5e25fSSatish Balay   PetscFunctionReturn(0);
43349b5e25fSSatish Balay }
43449b5e25fSSatish Balay 
4354a2ae208SSatish Balay #undef __FUNCT__
4364a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
437dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
43849b5e25fSSatish Balay {
43949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
44087828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
44149b5e25fSSatish Balay   MatScalar      *v;
4426849ba73SBarry Smith   PetscErrorCode ierr;
44313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
44498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
44549b5e25fSSatish Balay 
44649b5e25fSSatish Balay   PetscFunctionBegin;
4472dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4481ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4491ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
45049b5e25fSSatish Balay 
45149b5e25fSSatish Balay   v     = a->a;
45249b5e25fSSatish Balay   xb = x;
45349b5e25fSSatish Balay 
45449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
45549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
45649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
45749b5e25fSSatish Balay     ib = aj + *ai;
458831a3094SHong Zhang     jmin = 0;
45998c9bda7SSatish Balay     nonzerorow += (n>0);
4607fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
46149b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
46249b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
46349b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
46449b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
46549b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
466831a3094SHong Zhang       v += 25; jmin++;
4677fbae186SHong Zhang     }
468831a3094SHong Zhang     for (j=jmin; j<n; j++) {
46949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
47049b5e25fSSatish Balay       cval       = ib[j]*5;
47149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
47249b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
47349b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
47449b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
47549b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
47649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
47749b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
47849b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
47949b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
48049b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
48149b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
48249b5e25fSSatish Balay       v  += 25;
48349b5e25fSSatish Balay     }
48449b5e25fSSatish Balay     xb +=5; ai++;
48549b5e25fSSatish Balay   }
48649b5e25fSSatish Balay 
4871ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4881ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
489dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
49049b5e25fSSatish Balay   PetscFunctionReturn(0);
49149b5e25fSSatish Balay }
49249b5e25fSSatish Balay 
49349b5e25fSSatish Balay 
4944a2ae208SSatish Balay #undef __FUNCT__
4954a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
496dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
49749b5e25fSSatish Balay {
49849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
49987828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
50049b5e25fSSatish Balay   MatScalar      *v;
5016849ba73SBarry Smith   PetscErrorCode ierr;
50213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
50398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
50449b5e25fSSatish Balay 
50549b5e25fSSatish Balay   PetscFunctionBegin;
5062dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5071ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5081ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
50949b5e25fSSatish Balay 
51049b5e25fSSatish Balay   v     = a->a;
51149b5e25fSSatish Balay   xb = x;
51249b5e25fSSatish Balay 
51349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
51449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
51549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
51649b5e25fSSatish Balay     ib = aj + *ai;
517831a3094SHong Zhang     jmin = 0;
51898c9bda7SSatish Balay     nonzerorow += (n>0);
5197fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
52049b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
52149b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
52249b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
52349b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
52449b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
52549b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
526831a3094SHong Zhang       v += 36; jmin++;
5277fbae186SHong Zhang     }
528831a3094SHong Zhang     for (j=jmin; j<n; j++) {
52949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
53049b5e25fSSatish Balay       cval       = ib[j]*6;
53149b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
53249b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
53349b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
53449b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
53549b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
53649b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
53749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
53849b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
53949b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
54049b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
54149b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
54249b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
54349b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
54449b5e25fSSatish Balay       v  += 36;
54549b5e25fSSatish Balay     }
54649b5e25fSSatish Balay     xb +=6; ai++;
54749b5e25fSSatish Balay   }
54849b5e25fSSatish Balay 
5491ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5501ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
551dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
55249b5e25fSSatish Balay   PetscFunctionReturn(0);
55349b5e25fSSatish Balay }
5544a2ae208SSatish Balay #undef __FUNCT__
5554a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
556dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
55749b5e25fSSatish Balay {
55849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
55987828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
56049b5e25fSSatish Balay   MatScalar      *v;
5616849ba73SBarry Smith   PetscErrorCode ierr;
56213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
56398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
56449b5e25fSSatish Balay 
56549b5e25fSSatish Balay   PetscFunctionBegin;
5662dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5671ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5681ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
56949b5e25fSSatish Balay 
57049b5e25fSSatish Balay   v     = a->a;
57149b5e25fSSatish Balay   xb = x;
57249b5e25fSSatish Balay 
57349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
57449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
57549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
57649b5e25fSSatish Balay     ib = aj + *ai;
577831a3094SHong Zhang     jmin = 0;
57898c9bda7SSatish Balay     nonzerorow += (n>0);
5797fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
58049b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
58149b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
58249b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
58349b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
58449b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
58549b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
58649b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
587831a3094SHong Zhang       v += 49; jmin++;
5887fbae186SHong Zhang     }
589831a3094SHong Zhang     for (j=jmin; j<n; j++) {
59049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
59149b5e25fSSatish Balay       cval       = ib[j]*7;
59249b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
59349b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
59449b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
59549b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
59649b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
59749b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
59849b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
59949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
60049b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
60149b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
60249b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
60349b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
60449b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
60549b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
60649b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
60749b5e25fSSatish Balay       v  += 49;
60849b5e25fSSatish Balay     }
60949b5e25fSSatish Balay     xb +=7; ai++;
61049b5e25fSSatish Balay   }
6111ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6121ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
613dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
61449b5e25fSSatish Balay   PetscFunctionReturn(0);
61549b5e25fSSatish Balay }
61649b5e25fSSatish Balay 
61749b5e25fSSatish Balay /*
61849b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
61949b5e25fSSatish Balay */
6204a2ae208SSatish Balay #undef __FUNCT__
6214a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
622dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
62349b5e25fSSatish Balay {
62449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
62587828ca2SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
6260b60a74dSHong Zhang   MatScalar      *v;
627dfbe8321SBarry Smith   PetscErrorCode ierr;
628d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
62998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
63049b5e25fSSatish Balay 
63149b5e25fSSatish Balay   PetscFunctionBegin;
6322dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
6331ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
6341ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
63549b5e25fSSatish Balay 
63649b5e25fSSatish Balay   aj   = a->j;
63749b5e25fSSatish Balay   v    = a->a;
63849b5e25fSSatish Balay   ii   = a->i;
63949b5e25fSSatish Balay 
64049b5e25fSSatish Balay   if (!a->mult_work) {
641d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
64249b5e25fSSatish Balay   }
64349b5e25fSSatish Balay   work = a->mult_work;
64449b5e25fSSatish Balay 
64549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
64649b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
64749b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
64898c9bda7SSatish Balay     nonzerorow += (n>0);
64949b5e25fSSatish Balay 
65049b5e25fSSatish Balay     /* upper triangular part */
65149b5e25fSSatish Balay     for (j=0; j<n; j++) {
65249b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
65349b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
65449b5e25fSSatish Balay       workt += bs;
65549b5e25fSSatish Balay     }
65649b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
65749b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
65849b5e25fSSatish Balay 
65949b5e25fSSatish Balay     /* strict lower triangular part */
660831a3094SHong Zhang     idx = aj+ii[0];
661831a3094SHong Zhang     if (*idx == i){
66296b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
663831a3094SHong Zhang     }
66496b9376eSHong Zhang 
66549b5e25fSSatish Balay     if (ncols > 0){
66649b5e25fSSatish Balay       workt = work;
66787828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
668831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
669831a3094SHong Zhang       for (j=0; j<n; j++) {
670831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
67149b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
67249b5e25fSSatish Balay         workt += bs;
67349b5e25fSSatish Balay       }
67449b5e25fSSatish Balay     }
67549b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
67649b5e25fSSatish Balay   }
67749b5e25fSSatish Balay 
6781ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6791ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
680dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr);
68149b5e25fSSatish Balay   PetscFunctionReturn(0);
68249b5e25fSSatish Balay }
68349b5e25fSSatish Balay 
6846a65c766SHong Zhang extern PetscErrorCode VecCopy_Seq(Vec,Vec);
6854a2ae208SSatish Balay #undef __FUNCT__
6864a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
687dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
68849b5e25fSSatish Balay {
68949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
690bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1;
69149b5e25fSSatish Balay   MatScalar      *v;
6926849ba73SBarry Smith   PetscErrorCode ierr;
69313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
69498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
69549b5e25fSSatish Balay 
69649b5e25fSSatish Balay   PetscFunctionBegin;
6976a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
6981ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6991ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
70049b5e25fSSatish Balay   v  = a->a;
70149b5e25fSSatish Balay   xb = x;
70249b5e25fSSatish Balay 
70349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
70449b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
70549b5e25fSSatish Balay     x1 = xb[0];
70649b5e25fSSatish Balay     ib = aj + *ai;
707831a3094SHong Zhang     jmin = 0;
70898c9bda7SSatish Balay     nonzerorow += (n>0);
709831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
710831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
711831a3094SHong Zhang     }
712831a3094SHong Zhang     for (j=jmin; j<n; j++) {
71349b5e25fSSatish Balay       cval    = *ib;
71449b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
71549b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
71649b5e25fSSatish Balay     }
71749b5e25fSSatish Balay     xb++; ai++;
71849b5e25fSSatish Balay   }
71949b5e25fSSatish Balay 
7201ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
721bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
72249b5e25fSSatish Balay 
723dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
72449b5e25fSSatish Balay   PetscFunctionReturn(0);
72549b5e25fSSatish Balay }
72649b5e25fSSatish Balay 
7274a2ae208SSatish Balay #undef __FUNCT__
7284a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
729dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
73049b5e25fSSatish Balay {
73149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
732bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2;
73349b5e25fSSatish Balay   MatScalar      *v;
7346849ba73SBarry Smith   PetscErrorCode ierr;
73513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
73698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
73749b5e25fSSatish Balay 
73849b5e25fSSatish Balay   PetscFunctionBegin;
7396a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7401ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7411ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
74249b5e25fSSatish Balay 
74349b5e25fSSatish Balay   v  = a->a;
74449b5e25fSSatish Balay   xb = x;
74549b5e25fSSatish Balay 
74649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
74749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
74849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
74949b5e25fSSatish Balay     ib = aj + *ai;
750831a3094SHong Zhang     jmin = 0;
75198c9bda7SSatish Balay     nonzerorow += (n>0);
7527fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
75349b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
75449b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
755831a3094SHong Zhang       v += 4; jmin++;
7567fbae186SHong Zhang     }
757831a3094SHong Zhang     for (j=jmin; j<n; j++) {
75849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
75949b5e25fSSatish Balay       cval       = ib[j]*2;
76049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
76149b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
76249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
76349b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
76449b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
76549b5e25fSSatish Balay       v  += 4;
76649b5e25fSSatish Balay     }
76749b5e25fSSatish Balay     xb +=2; ai++;
76849b5e25fSSatish Balay   }
7691ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
770bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
77149b5e25fSSatish Balay 
772dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
77349b5e25fSSatish Balay   PetscFunctionReturn(0);
77449b5e25fSSatish Balay }
77549b5e25fSSatish Balay 
7764a2ae208SSatish Balay #undef __FUNCT__
7774a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
778dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
77949b5e25fSSatish Balay {
78049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
781bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3;
78249b5e25fSSatish Balay   MatScalar      *v;
7836849ba73SBarry Smith   PetscErrorCode ierr;
78413f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
78598c9bda7SSatish Balay   PetscInt       nonzerorow=0;
78649b5e25fSSatish Balay 
78749b5e25fSSatish Balay   PetscFunctionBegin;
7886a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7891ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7901ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
79149b5e25fSSatish Balay 
79249b5e25fSSatish Balay   v     = a->a;
79349b5e25fSSatish Balay   xb = x;
79449b5e25fSSatish Balay 
79549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
79649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
79749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
79849b5e25fSSatish Balay     ib = aj + *ai;
799831a3094SHong Zhang     jmin = 0;
80098c9bda7SSatish Balay     nonzerorow += (n>0);
8017fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
80249b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
80349b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
80449b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
805831a3094SHong Zhang      v += 9; jmin++;
8067fbae186SHong Zhang     }
807831a3094SHong Zhang     for (j=jmin; j<n; j++) {
80849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
80949b5e25fSSatish Balay       cval       = ib[j]*3;
81049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
81149b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
81249b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
81349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
81449b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
81549b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
81649b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
81749b5e25fSSatish Balay       v  += 9;
81849b5e25fSSatish Balay     }
81949b5e25fSSatish Balay     xb +=3; ai++;
82049b5e25fSSatish Balay   }
82149b5e25fSSatish Balay 
8221ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
823bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
82449b5e25fSSatish Balay 
825dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
82649b5e25fSSatish Balay   PetscFunctionReturn(0);
82749b5e25fSSatish Balay }
82849b5e25fSSatish Balay 
8294a2ae208SSatish Balay #undef __FUNCT__
8304a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
831dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
83249b5e25fSSatish Balay {
83349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
834bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
83549b5e25fSSatish Balay   MatScalar      *v;
8366849ba73SBarry Smith   PetscErrorCode ierr;
83713f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
83898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
83949b5e25fSSatish Balay 
84049b5e25fSSatish Balay   PetscFunctionBegin;
8416a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8421ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8431ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
84449b5e25fSSatish Balay 
84549b5e25fSSatish Balay   v     = a->a;
84649b5e25fSSatish Balay   xb = x;
84749b5e25fSSatish Balay 
84849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
84949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
85049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
85149b5e25fSSatish Balay     ib = aj + *ai;
852831a3094SHong Zhang     jmin = 0;
85398c9bda7SSatish Balay     nonzerorow += (n>0);
8547fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
85549b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
85649b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
85749b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
85849b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
859831a3094SHong Zhang       v += 16; jmin++;
8607fbae186SHong Zhang     }
861831a3094SHong Zhang     for (j=jmin; j<n; j++) {
86249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
86349b5e25fSSatish Balay       cval       = ib[j]*4;
86449b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
86549b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
86649b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
86749b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
86849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
86949b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
87049b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
87149b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
87249b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
87349b5e25fSSatish Balay       v  += 16;
87449b5e25fSSatish Balay     }
87549b5e25fSSatish Balay     xb +=4; ai++;
87649b5e25fSSatish Balay   }
87749b5e25fSSatish Balay 
8781ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
879bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
88049b5e25fSSatish Balay 
881dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
88249b5e25fSSatish Balay   PetscFunctionReturn(0);
88349b5e25fSSatish Balay }
88449b5e25fSSatish Balay 
8854a2ae208SSatish Balay #undef __FUNCT__
8864a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
887dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
88849b5e25fSSatish Balay {
88949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
890bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
89149b5e25fSSatish Balay   MatScalar      *v;
8926849ba73SBarry Smith   PetscErrorCode ierr;
89313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
89498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
89549b5e25fSSatish Balay 
89649b5e25fSSatish Balay   PetscFunctionBegin;
8976a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8981ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8991ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
90049b5e25fSSatish Balay 
90149b5e25fSSatish Balay   v     = a->a;
90249b5e25fSSatish Balay   xb = x;
90349b5e25fSSatish Balay 
90449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
90549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
90649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
90749b5e25fSSatish Balay     ib = aj + *ai;
908831a3094SHong Zhang     jmin = 0;
90998c9bda7SSatish Balay     nonzerorow += (n>0);
9107fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
91149b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
91249b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
91349b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
91449b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
91549b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
916831a3094SHong Zhang       v += 25; jmin++;
9177fbae186SHong Zhang     }
918831a3094SHong Zhang     for (j=jmin; j<n; j++) {
91949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
92049b5e25fSSatish Balay       cval       = ib[j]*5;
92149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
92249b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
92349b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
92449b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
92549b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
92649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
92749b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
92849b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
92949b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
93049b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
93149b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
93249b5e25fSSatish Balay       v  += 25;
93349b5e25fSSatish Balay     }
93449b5e25fSSatish Balay     xb +=5; ai++;
93549b5e25fSSatish Balay   }
93649b5e25fSSatish Balay 
9371ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
938bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
93949b5e25fSSatish Balay 
940dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
94149b5e25fSSatish Balay   PetscFunctionReturn(0);
94249b5e25fSSatish Balay }
9434a2ae208SSatish Balay #undef __FUNCT__
9444a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
945dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
94649b5e25fSSatish Balay {
94749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
948bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
94949b5e25fSSatish Balay   MatScalar      *v;
9506849ba73SBarry Smith   PetscErrorCode ierr;
95113f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
95298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
95349b5e25fSSatish Balay 
95449b5e25fSSatish Balay   PetscFunctionBegin;
9556a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9561ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9571ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
95849b5e25fSSatish Balay 
95949b5e25fSSatish Balay   v     = a->a;
96049b5e25fSSatish Balay   xb = x;
96149b5e25fSSatish Balay 
96249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
96349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
96449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
96549b5e25fSSatish Balay     ib = aj + *ai;
966831a3094SHong Zhang     jmin = 0;
96798c9bda7SSatish Balay     nonzerorow += (n>0);
9687fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
96949b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
97049b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
97149b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
97249b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
97349b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
97449b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
975831a3094SHong Zhang       v += 36; jmin++;
9767fbae186SHong Zhang     }
977831a3094SHong Zhang     for (j=jmin; j<n; j++) {
97849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
97949b5e25fSSatish Balay       cval       = ib[j]*6;
98049b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
98149b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
98249b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
98349b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
98449b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
98549b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
98649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
98749b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
98849b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
98949b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
99049b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
99149b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
99249b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
99349b5e25fSSatish Balay       v  += 36;
99449b5e25fSSatish Balay     }
99549b5e25fSSatish Balay     xb +=6; ai++;
99649b5e25fSSatish Balay   }
99749b5e25fSSatish Balay 
9981ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
999bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
100049b5e25fSSatish Balay 
1001dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
100249b5e25fSSatish Balay   PetscFunctionReturn(0);
100349b5e25fSSatish Balay }
100449b5e25fSSatish Balay 
10054a2ae208SSatish Balay #undef __FUNCT__
10064a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
1007dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
100849b5e25fSSatish Balay {
100949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1010bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
101149b5e25fSSatish Balay   MatScalar      *v;
10126849ba73SBarry Smith   PetscErrorCode ierr;
101313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
101498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
101549b5e25fSSatish Balay 
101649b5e25fSSatish Balay   PetscFunctionBegin;
10176a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
10181ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
10191ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
102049b5e25fSSatish Balay 
102149b5e25fSSatish Balay   v     = a->a;
102249b5e25fSSatish Balay   xb = x;
102349b5e25fSSatish Balay 
102449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
102549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
102649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
102749b5e25fSSatish Balay     ib = aj + *ai;
1028831a3094SHong Zhang     jmin = 0;
102998c9bda7SSatish Balay     nonzerorow += (n>0);
10307fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
103149b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
103249b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
103349b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
103449b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
103549b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
103649b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
103749b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1038831a3094SHong Zhang       v += 49; jmin++;
10397fbae186SHong Zhang     }
1040831a3094SHong Zhang     for (j=jmin; j<n; j++) {
104149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
104249b5e25fSSatish Balay       cval       = ib[j]*7;
104349b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
104449b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
104549b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
104649b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
104749b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
104849b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
104949b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
105049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
105149b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
105249b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
105349b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
105449b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
105549b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
105649b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
105749b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
105849b5e25fSSatish Balay       v  += 49;
105949b5e25fSSatish Balay     }
106049b5e25fSSatish Balay     xb +=7; ai++;
106149b5e25fSSatish Balay   }
106249b5e25fSSatish Balay 
10631ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1064bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
106549b5e25fSSatish Balay 
1066dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
106749b5e25fSSatish Balay   PetscFunctionReturn(0);
106849b5e25fSSatish Balay }
106949b5e25fSSatish Balay 
10704a2ae208SSatish Balay #undef __FUNCT__
10714a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1072dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
107349b5e25fSSatish Balay {
107449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1075bba805e6SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1076066653e3SSatish Balay   MatScalar      *v;
1077dfbe8321SBarry Smith   PetscErrorCode ierr;
1078d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
107998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
108049b5e25fSSatish Balay 
108149b5e25fSSatish Balay   PetscFunctionBegin;
10826a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
10831ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
10841ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
108549b5e25fSSatish Balay 
108649b5e25fSSatish Balay   aj   = a->j;
108749b5e25fSSatish Balay   v    = a->a;
108849b5e25fSSatish Balay   ii   = a->i;
108949b5e25fSSatish Balay 
109049b5e25fSSatish Balay   if (!a->mult_work) {
1091d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
109249b5e25fSSatish Balay   }
109349b5e25fSSatish Balay   work = a->mult_work;
109449b5e25fSSatish Balay 
109549b5e25fSSatish Balay 
109649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
109749b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
109849b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
109998c9bda7SSatish Balay     nonzerorow += (n>0);
110049b5e25fSSatish Balay 
110149b5e25fSSatish Balay     /* upper triangular part */
110249b5e25fSSatish Balay     for (j=0; j<n; j++) {
110349b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
110449b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
110549b5e25fSSatish Balay       workt += bs;
110649b5e25fSSatish Balay     }
110749b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
110849b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
110949b5e25fSSatish Balay 
111049b5e25fSSatish Balay     /* strict lower triangular part */
1111831a3094SHong Zhang     idx = aj+ii[0];
1112831a3094SHong Zhang     if (*idx == i){
111396b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1114831a3094SHong Zhang     }
111549b5e25fSSatish Balay     if (ncols > 0){
111649b5e25fSSatish Balay       workt = work;
111787828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1118831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1119831a3094SHong Zhang       for (j=0; j<n; j++) {
1120831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
112149b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
112249b5e25fSSatish Balay         workt += bs;
112349b5e25fSSatish Balay       }
112449b5e25fSSatish Balay     }
112549b5e25fSSatish Balay 
112649b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
112749b5e25fSSatish Balay   }
112849b5e25fSSatish Balay 
11291ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1130bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
113149b5e25fSSatish Balay 
1132dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
113349b5e25fSSatish Balay   PetscFunctionReturn(0);
113449b5e25fSSatish Balay }
113549b5e25fSSatish Balay 
11364a2ae208SSatish Balay #undef __FUNCT__
11374a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1138f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
113949b5e25fSSatish Balay {
114049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1141f4df32b1SMatthew Knepley   PetscScalar    oalpha = alpha;
1142efee365bSSatish Balay   PetscErrorCode ierr;
11430805154bSBarry Smith   PetscBLASInt   one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz);
114449b5e25fSSatish Balay 
114549b5e25fSSatish Balay   PetscFunctionBegin;
1146f4df32b1SMatthew Knepley   BLASscal_(&totalnz,&oalpha,a->a,&one);
1147efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
114849b5e25fSSatish Balay   PetscFunctionReturn(0);
114949b5e25fSSatish Balay }
115049b5e25fSSatish Balay 
11514a2ae208SSatish Balay #undef __FUNCT__
11524a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1153dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
115449b5e25fSSatish Balay {
115549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
115649b5e25fSSatish Balay   MatScalar      *v = a->a;
115749b5e25fSSatish Balay   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1158d0f46423SBarry Smith   PetscInt       i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
11596849ba73SBarry Smith   PetscErrorCode ierr;
116013f74950SBarry Smith   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
116149b5e25fSSatish Balay 
116249b5e25fSSatish Balay   PetscFunctionBegin;
116349b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
116449b5e25fSSatish Balay     for (k=0; k<mbs; k++){
116549b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1166831a3094SHong Zhang       col  = aj + jmin;
1167831a3094SHong Zhang       if (*col == k){         /* diagonal block */
116849b5e25fSSatish Balay         for (i=0; i<bs2; i++){
116949b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
117049b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
117149b5e25fSSatish Balay #else
117249b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
117349b5e25fSSatish Balay #endif
117449b5e25fSSatish Balay         }
1175831a3094SHong Zhang         jmin++;
1176831a3094SHong Zhang       }
1177831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
117849b5e25fSSatish Balay         for (i=0; i<bs2; i++){
117949b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
118049b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
118149b5e25fSSatish Balay #else
118249b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
118349b5e25fSSatish Balay #endif
118449b5e25fSSatish Balay         }
118549b5e25fSSatish Balay       }
118649b5e25fSSatish Balay     }
118749b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
11880b8dc8d2SHong Zhang   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
11890b8dc8d2SHong Zhang     ierr = PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);CHKERRQ(ierr);
11900b8dc8d2SHong Zhang     jl   = il + mbs;
11910b8dc8d2SHong Zhang     sum  = (PetscReal*)(jl + mbs);
11920b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11930b8dc8d2SHong Zhang     il[0] = 0;
119449b5e25fSSatish Balay 
119549b5e25fSSatish Balay     *norm = 0.0;
119649b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
119749b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
119849b5e25fSSatish Balay       /*-- col sum --*/
119949b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1200831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1201831a3094SHong Zhang                   at step k */
120249b5e25fSSatish Balay       while (i<mbs){
120349b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
120449b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
120549b5e25fSSatish Balay         for (j=0; j<bs; j++){
120649b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
120749b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
120849b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
120949b5e25fSSatish Balay           }
121049b5e25fSSatish Balay         }
121149b5e25fSSatish Balay         /* update il, jl */
1212831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1213831a3094SHong Zhang         jmax = a->i[i+1];
121449b5e25fSSatish Balay         if (jmin < jmax){
121549b5e25fSSatish Balay           il[i] = jmin;
121649b5e25fSSatish Balay           j   = a->j[jmin];
121749b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
121849b5e25fSSatish Balay         }
121949b5e25fSSatish Balay         i = nexti;
122049b5e25fSSatish Balay       }
122149b5e25fSSatish Balay       /*-- row sum --*/
122249b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
122349b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
122449b5e25fSSatish Balay         for (j=0; j<bs; j++){
122549b5e25fSSatish Balay           v = a->a + i*bs2 + j;
122649b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
12270b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
122849b5e25fSSatish Balay           }
122949b5e25fSSatish Balay         }
123049b5e25fSSatish Balay       }
123149b5e25fSSatish Balay       /* add k_th block row to il, jl */
1232831a3094SHong Zhang       col = aj+jmin;
1233831a3094SHong Zhang       if (*col == k) jmin++;
123449b5e25fSSatish Balay       if (jmin < jmax){
123549b5e25fSSatish Balay         il[k] = jmin;
12360b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
123749b5e25fSSatish Balay       }
123849b5e25fSSatish Balay       for (j=0; j<bs; j++){
123949b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
124049b5e25fSSatish Balay       }
124149b5e25fSSatish Balay     }
124249b5e25fSSatish Balay     ierr = PetscFree(il);CHKERRQ(ierr);
124349b5e25fSSatish Balay   } else {
1244347d480fSBarry Smith     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
124549b5e25fSSatish Balay   }
124649b5e25fSSatish Balay   PetscFunctionReturn(0);
124749b5e25fSSatish Balay }
124849b5e25fSSatish Balay 
12494a2ae208SSatish Balay #undef __FUNCT__
12504a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1251dfbe8321SBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
125249b5e25fSSatish Balay {
125349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1254dfbe8321SBarry Smith   PetscErrorCode ierr;
125549b5e25fSSatish Balay 
125649b5e25fSSatish Balay   PetscFunctionBegin;
125749b5e25fSSatish Balay 
125849b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1259d0f46423SBarry Smith   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1260ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1261ef511fbeSHong Zhang     PetscFunctionReturn(0);
126249b5e25fSSatish Balay   }
126349b5e25fSSatish Balay 
126449b5e25fSSatish Balay   /* if the a->i are the same */
126513f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1266abc0a331SBarry Smith   if (!*flg) {
126749b5e25fSSatish Balay     PetscFunctionReturn(0);
126849b5e25fSSatish Balay   }
126949b5e25fSSatish Balay 
127049b5e25fSSatish Balay   /* if a->j are the same */
127113f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1272abc0a331SBarry Smith   if (!*flg) {
127349b5e25fSSatish Balay     PetscFunctionReturn(0);
127449b5e25fSSatish Balay   }
127549b5e25fSSatish Balay   /* if a->a are the same */
1276d0f46423SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1277935af2e7SHong Zhang   PetscFunctionReturn(0);
127849b5e25fSSatish Balay }
127949b5e25fSSatish Balay 
12804a2ae208SSatish Balay #undef __FUNCT__
12814a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1282dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
128349b5e25fSSatish Balay {
128449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1285dfbe8321SBarry Smith   PetscErrorCode ierr;
128613f74950SBarry Smith   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
128787828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
128849b5e25fSSatish Balay   MatScalar      *aa,*aa_j;
128949b5e25fSSatish Balay 
129049b5e25fSSatish Balay   PetscFunctionBegin;
1291d0f46423SBarry Smith   bs   = A->rmap->bs;
129282799104SHong Zhang   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
129382799104SHong Zhang 
129449b5e25fSSatish Balay   aa   = a->a;
129549b5e25fSSatish Balay   ai   = a->i;
129649b5e25fSSatish Balay   aj   = a->j;
129749b5e25fSSatish Balay   ambs = a->mbs;
129849b5e25fSSatish Balay   bs2  = a->bs2;
129949b5e25fSSatish Balay 
13002dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
13011ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
130249b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1303d0f46423SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
130449b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
130549b5e25fSSatish Balay     j=ai[i];
130649b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
130749b5e25fSSatish Balay       row  = i*bs;
130849b5e25fSSatish Balay       aa_j = aa + j*bs2;
130982799104SHong Zhang       if (A->factor && bs==1){
131082799104SHong Zhang         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
131182799104SHong Zhang       } else {
131249b5e25fSSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
131349b5e25fSSatish Balay       }
131449b5e25fSSatish Balay     }
131582799104SHong Zhang   }
131682799104SHong Zhang 
13171ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
131849b5e25fSSatish Balay   PetscFunctionReturn(0);
131949b5e25fSSatish Balay }
132049b5e25fSSatish Balay 
13214a2ae208SSatish Balay #undef __FUNCT__
13224a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1323dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
132449b5e25fSSatish Balay {
132549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
13265e90f9d9SHong Zhang   PetscScalar    *l,x,*li,*ri;
132749b5e25fSSatish Balay   MatScalar      *aa,*v;
1328dfbe8321SBarry Smith   PetscErrorCode ierr;
13295e90f9d9SHong Zhang   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1330b3bf805bSHong Zhang   PetscTruth     flg;
133149b5e25fSSatish Balay 
133249b5e25fSSatish Balay   PetscFunctionBegin;
1333b3bf805bSHong Zhang   if (ll != rr){
1334b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1335b3bf805bSHong Zhang     if (!flg)
1336b3bf805bSHong Zhang       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1337b3bf805bSHong Zhang   }
1338b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
133949b5e25fSSatish Balay   ai  = a->i;
134049b5e25fSSatish Balay   aj  = a->j;
134149b5e25fSSatish Balay   aa  = a->a;
1342d0f46423SBarry Smith   m   = A->rmap->N;
1343d0f46423SBarry Smith   bs  = A->rmap->bs;
134449b5e25fSSatish Balay   mbs = a->mbs;
134549b5e25fSSatish Balay   bs2 = a->bs2;
134649b5e25fSSatish Balay 
13471ebc52fbSHong Zhang   ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
134849b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1349347d480fSBarry Smith   if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
135049b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
135149b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
135249b5e25fSSatish Balay     li = l + i*bs;
135349b5e25fSSatish Balay     v  = aa + bs2*ai[i];
135449b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
135549b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
13565e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
135749b5e25fSSatish Balay         x = ri[k];
135849b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
135949b5e25fSSatish Balay       }
136049b5e25fSSatish Balay     }
136149b5e25fSSatish Balay   }
13621ebc52fbSHong Zhang   ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1363dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
136449b5e25fSSatish Balay   PetscFunctionReturn(0);
136549b5e25fSSatish Balay }
136649b5e25fSSatish Balay 
13674a2ae208SSatish Balay #undef __FUNCT__
13684a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1369dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
137049b5e25fSSatish Balay {
137149b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
137249b5e25fSSatish Balay 
137349b5e25fSSatish Balay   PetscFunctionBegin;
137449b5e25fSSatish Balay   info->block_size     = a->bs2;
13756c6c5352SBarry Smith   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
13766c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
137749b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
137849b5e25fSSatish Balay   info->assemblies   = A->num_ass;
137949b5e25fSSatish Balay   info->mallocs      = a->reallocs;
13807adad957SLisandro Dalcin   info->memory       = ((PetscObject)A)->mem;
138149b5e25fSSatish Balay   if (A->factor) {
138249b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
138349b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
138449b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
138549b5e25fSSatish Balay   } else {
138649b5e25fSSatish Balay     info->fill_ratio_given  = 0;
138749b5e25fSSatish Balay     info->fill_ratio_needed = 0;
138849b5e25fSSatish Balay     info->factor_mallocs    = 0;
138949b5e25fSSatish Balay   }
139049b5e25fSSatish Balay   PetscFunctionReturn(0);
139149b5e25fSSatish Balay }
139249b5e25fSSatish Balay 
139349b5e25fSSatish Balay 
13944a2ae208SSatish Balay #undef __FUNCT__
13954a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1396dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
139749b5e25fSSatish Balay {
139849b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1399dfbe8321SBarry Smith   PetscErrorCode ierr;
140049b5e25fSSatish Balay 
140149b5e25fSSatish Balay   PetscFunctionBegin;
140249b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
140349b5e25fSSatish Balay   PetscFunctionReturn(0);
140449b5e25fSSatish Balay }
1405dc354874SHong Zhang 
14064a2ae208SSatish Balay #undef __FUNCT__
1407985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ"
1408985db425SBarry Smith /*
1409985db425SBarry Smith    This code does not work since it only checks the upper triangular part of
1410985db425SBarry Smith   the matrix. Hence it is not listed in the function table.
1411985db425SBarry Smith */
1412985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1413dc354874SHong Zhang {
1414dc354874SHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1415dfbe8321SBarry Smith   PetscErrorCode ierr;
141613f74950SBarry Smith   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1417c3fca9a7SHong Zhang   PetscReal      atmp;
1418273d9f13SBarry Smith   MatScalar      *aa;
1419985db425SBarry Smith   PetscScalar    *x;
142013f74950SBarry Smith   PetscInt       ncols,brow,bcol,krow,kcol;
1421dc354874SHong Zhang 
1422dc354874SHong Zhang   PetscFunctionBegin;
1423985db425SBarry Smith   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1424dc354874SHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1425d0f46423SBarry Smith   bs   = A->rmap->bs;
1426dc354874SHong Zhang   aa   = a->a;
1427dc354874SHong Zhang   ai   = a->i;
1428dc354874SHong Zhang   aj   = a->j;
142944117c81SHong Zhang   mbs = a->mbs;
1430dc354874SHong Zhang 
1431985db425SBarry Smith   ierr = VecSet(v,0.0);CHKERRQ(ierr);
14321ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1433dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1434d0f46423SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
143544117c81SHong Zhang   for (i=0; i<mbs; i++) {
1436d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1437d0f6400bSHong Zhang     brow  = bs*i;
143844117c81SHong Zhang     for (j=0; j<ncols; j++){
1439d0f6400bSHong Zhang       bcol = bs*(*aj);
144044117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1441d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
144244117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1443d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1444d0f6400bSHong Zhang           row = brow + krow;    /* row index */
1445c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1446c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
144744117c81SHong Zhang         }
144844117c81SHong Zhang       }
1449d0f6400bSHong Zhang       aj++;
1450dc354874SHong Zhang     }
1451dc354874SHong Zhang   }
14521ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1453dc354874SHong Zhang   PetscFunctionReturn(0);
1454dc354874SHong Zhang }
1455