xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision c60f02097ec1bad1d486ea4e6635560ad6843df9)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
249b5e25fSSatish Balay 
37c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h"
4*c60f0209SBarry Smith #include "../src/mat/blockinvert.h"
549b5e25fSSatish Balay #include "petscbt.h"
67c4f633dSBarry Smith #include "../src/mat/impls/sbaij/seq/sbaij.h"
749b5e25fSSatish Balay 
84a2ae208SSatish Balay #undef __FUNCT__
94a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1013f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1149b5e25fSSatish Balay {
125eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
136849ba73SBarry Smith   PetscErrorCode ierr;
145d0c19d7SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
155d0c19d7SBarry Smith   const PetscInt *idx;
16521d7252SBarry Smith   PetscBT        table,table0;
17d94109b8SHong Zhang 
18d94109b8SHong Zhang   PetscFunctionBegin;
19b3bf805bSHong Zhang   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20d94109b8SHong Zhang   mbs = a->mbs;
21d94109b8SHong Zhang   ai  = a->i;
22d94109b8SHong Zhang   aj  = a->j;
23d0f46423SBarry Smith   bs  = A->rmap->bs;
24d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr);
2513f74950SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
26d0f46423SBarry Smith   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
27d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr);
28d94109b8SHong Zhang 
29d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
30d94109b8SHong Zhang     isz  = 0;
31d94109b8SHong Zhang     ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr);
32d94109b8SHong Zhang 
33d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
34d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
35d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
36d94109b8SHong Zhang 
37d94109b8SHong Zhang     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
38dbe03f88SHong Zhang     bcol_max = 0;
39d94109b8SHong Zhang     for (j=0; j<n ; ++j){
40d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
41d94109b8SHong Zhang       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42dbe03f88SHong Zhang       if(!PetscBTLookupSet(table,brow)) {
43dbe03f88SHong Zhang         nidx[isz++] = brow;
44dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
45dbe03f88SHong Zhang       }
46d94109b8SHong Zhang     }
47d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
48d94109b8SHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
49d94109b8SHong Zhang 
50d94109b8SHong Zhang     k = 0;
51d94109b8SHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
52d94109b8SHong Zhang       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53d94109b8SHong Zhang       ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr);
54efee365bSSatish Balay       for (l=k; l<isz; l++) { ierr = PetscBTSet(table0,nidx[l]);CHKERRQ(ierr); }
55d94109b8SHong Zhang 
56d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
57d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++){
58d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
59d94109b8SHong Zhang         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
60d94109b8SHong Zhang           for (l = start; l<end ; l++){
61d94109b8SHong Zhang             bcol = aj[l];
62d94109b8SHong Zhang             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
63d94109b8SHong Zhang           }
64d94109b8SHong Zhang           k++;
65d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
66d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
67d94109b8SHong Zhang           for (l = start; l<end ; l++){
68d94109b8SHong Zhang             bcol = aj[l];
69dbe03f88SHong Zhang             if (bcol > bcol_max) break;
70d94109b8SHong Zhang             if (PetscBTLookup(table0,bcol)){
71d94109b8SHong Zhang               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
72d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
73d94109b8SHong Zhang             }
74d94109b8SHong Zhang           }
75d94109b8SHong Zhang         }
76d94109b8SHong Zhang       }
77d94109b8SHong Zhang     } /* for each overlap */
78d94109b8SHong Zhang 
79d94109b8SHong Zhang     /* expand the Index Set */
80d94109b8SHong Zhang     for (j=0; j<isz; j++) {
81d94109b8SHong Zhang       for (k=0; k<bs; k++)
82d94109b8SHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
83d94109b8SHong Zhang     }
84d94109b8SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
85d94109b8SHong Zhang   }
86d94109b8SHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
87d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
88d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
89d94109b8SHong Zhang   ierr = PetscBTDestroy(table0);CHKERRQ(ierr);
905eee224dSHong Zhang   PetscFunctionReturn(0);
9149b5e25fSSatish Balay }
9249b5e25fSSatish Balay 
934a2ae208SSatish Balay #undef __FUNCT__
944a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
954aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
9649b5e25fSSatish Balay {
9749b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
986849ba73SBarry Smith   PetscErrorCode ierr;
9913f74950SBarry Smith   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
10013f74950SBarry Smith   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
1015d0c19d7SBarry Smith   PetscInt       nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
1025d0c19d7SBarry Smith   const PetscInt *irow,*aj = a->j,*ai = a->i;
10349b5e25fSSatish Balay   MatScalar      *mat_a;
10449b5e25fSSatish Balay   Mat            C;
10514ca34e6SBarry Smith   PetscTruth     flag,sorted;
10649b5e25fSSatish Balay 
10749b5e25fSSatish Balay   PetscFunctionBegin;
108e005ede5SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
10914ca34e6SBarry Smith   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
11014ca34e6SBarry Smith   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
11149b5e25fSSatish Balay 
11249b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11349b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11449b5e25fSSatish Balay 
11513f74950SBarry Smith   ierr  = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr);
11649b5e25fSSatish Balay   ssmap = smap;
11713f74950SBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
11813f74950SBarry Smith   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
11949b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
12049b5e25fSSatish Balay   /* determine lens of each row */
12149b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12249b5e25fSSatish Balay     kstart  = ai[irow[i]];
12349b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
12449b5e25fSSatish Balay     lens[i] = 0;
12549b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
12649b5e25fSSatish Balay         if (ssmap[aj[k]]) {
12749b5e25fSSatish Balay           lens[i]++;
12849b5e25fSSatish Balay         }
12949b5e25fSSatish Balay       }
13049b5e25fSSatish Balay     }
13149b5e25fSSatish Balay   /* Create and fill new matrix */
13249b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
13349b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
13449b5e25fSSatish Balay 
135d0f46423SBarry Smith     if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
13613f74950SBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
137abc0a331SBarry Smith     if (!flag) {
138347d480fSBarry Smith       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
13949b5e25fSSatish Balay     }
14013f74950SBarry Smith     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
14149b5e25fSSatish Balay     C = *B;
14249b5e25fSSatish Balay   } else {
1437adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
144f69a0ea3SMatthew Knepley     ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1457adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
146ab93d7beSBarry Smith     ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr);
14749b5e25fSSatish Balay   }
14849b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
14949b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
15049b5e25fSSatish Balay     row    = irow[i];
15149b5e25fSSatish Balay     kstart = ai[row];
15249b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
15349b5e25fSSatish Balay     mat_i  = c->i[i];
15449b5e25fSSatish Balay     mat_j  = c->j + mat_i;
15549b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
15649b5e25fSSatish Balay     mat_ilen = c->ilen + i;
15749b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
15849b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
15949b5e25fSSatish Balay         *mat_j++ = tcol - 1;
16049b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
16149b5e25fSSatish Balay         mat_a   += bs2;
16249b5e25fSSatish Balay         (*mat_ilen)++;
16349b5e25fSSatish Balay       }
16449b5e25fSSatish Balay     }
16549b5e25fSSatish Balay   }
16649b5e25fSSatish Balay 
16749b5e25fSSatish Balay   /* Free work space */
16849b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
16949b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
17049b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17149b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17249b5e25fSSatish Balay 
17349b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
17449b5e25fSSatish Balay   *B = C;
17549b5e25fSSatish Balay   PetscFunctionReturn(0);
17649b5e25fSSatish Balay }
17749b5e25fSSatish Balay 
1784a2ae208SSatish Balay #undef __FUNCT__
1794a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
1804aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
18149b5e25fSSatish Balay {
18249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
18349b5e25fSSatish Balay   IS             is1;
1846849ba73SBarry Smith   PetscErrorCode ierr;
1855d0c19d7SBarry Smith   PetscInt       *vary,*iary,nrows,i,bs=A->rmap->bs,count;
1865d0c19d7SBarry Smith   const PetscInt *irow;
18749b5e25fSSatish Balay 
18849b5e25fSSatish Balay   PetscFunctionBegin;
189e005ede5SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
19049b5e25fSSatish Balay 
19149b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
19249b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
19349b5e25fSSatish Balay 
19449b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
19549b5e25fSSatish Balay    and form the IS with compressed IS */
19613f74950SBarry Smith   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr);
19749b5e25fSSatish Balay   iary = vary + a->mbs;
19813f74950SBarry Smith   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
19949b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
20049b5e25fSSatish Balay 
20149b5e25fSSatish Balay   count = 0;
20249b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
203e005ede5SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
20449b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
20549b5e25fSSatish Balay   }
20649b5e25fSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
20749b5e25fSSatish Balay 
20849b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
20949b5e25fSSatish Balay   ierr = PetscFree(vary);CHKERRQ(ierr);
21049b5e25fSSatish Balay 
2114aa3045dSJed Brown   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,scall,B);CHKERRQ(ierr);
21249b5e25fSSatish Balay   ISDestroy(is1);
21349b5e25fSSatish Balay   PetscFunctionReturn(0);
21449b5e25fSSatish Balay }
21549b5e25fSSatish Balay 
2164a2ae208SSatish Balay #undef __FUNCT__
2174a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
21813f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
21949b5e25fSSatish Balay {
2206849ba73SBarry Smith   PetscErrorCode ierr;
22113f74950SBarry Smith   PetscInt       i;
22249b5e25fSSatish Balay 
22349b5e25fSSatish Balay   PetscFunctionBegin;
22449b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
22582502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
22649b5e25fSSatish Balay   }
22749b5e25fSSatish Balay 
22849b5e25fSSatish Balay   for (i=0; i<n; i++) {
2294aa3045dSJed Brown     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr);
23049b5e25fSSatish Balay   }
23149b5e25fSSatish Balay   PetscFunctionReturn(0);
23249b5e25fSSatish Balay }
23349b5e25fSSatish Balay 
23449b5e25fSSatish Balay /* -------------------------------------------------------*/
23549b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
23649b5e25fSSatish Balay /* -------------------------------------------------------*/
237d9eff348SSatish Balay #include "petscblaslapack.h"
23849b5e25fSSatish Balay 
2394a2ae208SSatish Balay #undef __FUNCT__
2404a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1"
241dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
24249b5e25fSSatish Balay {
24349b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
244061b2667SBarry Smith   const PetscScalar *x;
245061b2667SBarry Smith   PetscScalar       *z,x1,sum;
246061b2667SBarry Smith   const MatScalar   *v;
2476849ba73SBarry Smith   PetscErrorCode    ierr;
248061b2667SBarry Smith   PetscInt          mbs=a->mbs,i,j,nz;
249061b2667SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
25049b5e25fSSatish Balay 
25149b5e25fSSatish Balay   PetscFunctionBegin;
252061b2667SBarry Smith   ierr = VecSet(zz,0.0);CHKERRQ(ierr);
253061b2667SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2541ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
25549b5e25fSSatish Balay 
25649b5e25fSSatish Balay   v  = a->a;
257061b2667SBarry Smith   ib = aj;
25849b5e25fSSatish Balay 
25949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
260061b2667SBarry Smith     nz   = ai[i+1] - ai[i];        /* length of i_th row of A */
261061b2667SBarry Smith     x1   = x[i];
262061b2667SBarry Smith     sum  = v[0]*x1;                /* diagonal term */
263061b2667SBarry Smith     for (j=1; j<nz; j++) {
264061b2667SBarry Smith       z[ib[j]] += v[j] * x1;       /* (strict lower triangular part of A)*x  */
265061b2667SBarry Smith       sum      += v[j] * x[ib[j]]; /* (strict upper triangular part of A)*x  */
266831a3094SHong Zhang     }
267061b2667SBarry Smith     z[i] += sum;
268061b2667SBarry Smith     v    += nz;
269061b2667SBarry Smith     ib   += nz;
27049b5e25fSSatish Balay   }
27149b5e25fSSatish Balay 
272061b2667SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2731ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
274061b2667SBarry Smith   ierr = PetscLogFlops(2.0*(2.0*a->nz - mbs) - mbs);CHKERRQ(ierr);
27549b5e25fSSatish Balay   PetscFunctionReturn(0);
27649b5e25fSSatish Balay }
27749b5e25fSSatish Balay 
2784a2ae208SSatish Balay #undef __FUNCT__
2794a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
280dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
28149b5e25fSSatish Balay {
28249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
28387828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
28449b5e25fSSatish Balay   MatScalar      *v;
2856849ba73SBarry Smith   PetscErrorCode ierr;
28613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
28798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
28849b5e25fSSatish Balay 
28949b5e25fSSatish Balay   PetscFunctionBegin;
2902dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2911ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2921ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
29349b5e25fSSatish Balay 
29449b5e25fSSatish Balay   v     = a->a;
29549b5e25fSSatish Balay   xb = x;
29649b5e25fSSatish Balay 
29749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
29849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
29949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
30049b5e25fSSatish Balay     ib = aj + *ai;
301831a3094SHong Zhang     jmin = 0;
30298c9bda7SSatish Balay     nonzerorow += (n>0);
3037fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
30449b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
30549b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
306831a3094SHong Zhang       v += 4; jmin++;
3077fbae186SHong Zhang     }
308831a3094SHong Zhang     for (j=jmin; j<n; j++) {
30949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
31049b5e25fSSatish Balay       cval       = ib[j]*2;
31149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
31249b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
31349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
31449b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
31549b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
31649b5e25fSSatish Balay       v  += 4;
31749b5e25fSSatish Balay     }
31849b5e25fSSatish Balay     xb +=2; ai++;
31949b5e25fSSatish Balay   }
32049b5e25fSSatish Balay 
3211ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3221ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
323dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
32449b5e25fSSatish Balay   PetscFunctionReturn(0);
32549b5e25fSSatish Balay }
32649b5e25fSSatish Balay 
3274a2ae208SSatish Balay #undef __FUNCT__
3284a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
329dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
33049b5e25fSSatish Balay {
33149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
33287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
33349b5e25fSSatish Balay   MatScalar      *v;
3346849ba73SBarry Smith   PetscErrorCode ierr;
33513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
33698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
33749b5e25fSSatish Balay 
33849b5e25fSSatish Balay   PetscFunctionBegin;
3392dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3401ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3411ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
34249b5e25fSSatish Balay 
34349b5e25fSSatish Balay   v    = a->a;
34449b5e25fSSatish Balay   xb   = x;
34549b5e25fSSatish Balay 
34649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
34749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
34849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
34949b5e25fSSatish Balay     ib = aj + *ai;
350831a3094SHong Zhang     jmin = 0;
35198c9bda7SSatish Balay     nonzerorow += (n>0);
3527fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
35349b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
35449b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
35549b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
356831a3094SHong Zhang       v += 9; jmin++;
3577fbae186SHong Zhang     }
358831a3094SHong Zhang     for (j=jmin; j<n; j++) {
35949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
36049b5e25fSSatish Balay       cval       = ib[j]*3;
36149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
36249b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
36349b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
36449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
36549b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
36649b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
36749b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
36849b5e25fSSatish Balay       v  += 9;
36949b5e25fSSatish Balay     }
37049b5e25fSSatish Balay     xb +=3; ai++;
37149b5e25fSSatish Balay   }
37249b5e25fSSatish Balay 
3731ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3741ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
375dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
37649b5e25fSSatish Balay   PetscFunctionReturn(0);
37749b5e25fSSatish Balay }
37849b5e25fSSatish Balay 
3794a2ae208SSatish Balay #undef __FUNCT__
3804a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
381dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
38249b5e25fSSatish Balay {
38349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
38487828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
38549b5e25fSSatish Balay   MatScalar      *v;
3866849ba73SBarry Smith   PetscErrorCode ierr;
38713f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
38898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
38949b5e25fSSatish Balay 
39049b5e25fSSatish Balay   PetscFunctionBegin;
3912dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3921ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3931ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
39449b5e25fSSatish Balay 
39549b5e25fSSatish Balay   v     = a->a;
39649b5e25fSSatish Balay   xb = x;
39749b5e25fSSatish Balay 
39849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
39949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
40049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
40149b5e25fSSatish Balay     ib = aj + *ai;
402831a3094SHong Zhang     jmin = 0;
40398c9bda7SSatish Balay     nonzerorow += (n>0);
4047fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
40549b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
40649b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
40749b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
40849b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
409831a3094SHong Zhang       v += 16; jmin++;
4107fbae186SHong Zhang     }
411831a3094SHong Zhang     for (j=jmin; j<n; j++) {
41249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
41349b5e25fSSatish Balay       cval       = ib[j]*4;
41449b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
41549b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
41649b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
41749b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
41849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
41949b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
42049b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
42149b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
42249b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
42349b5e25fSSatish Balay       v  += 16;
42449b5e25fSSatish Balay     }
42549b5e25fSSatish Balay     xb +=4; ai++;
42649b5e25fSSatish Balay   }
42749b5e25fSSatish Balay 
4281ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4291ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
430dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
43149b5e25fSSatish Balay   PetscFunctionReturn(0);
43249b5e25fSSatish Balay }
43349b5e25fSSatish Balay 
4344a2ae208SSatish Balay #undef __FUNCT__
4354a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
436dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
43749b5e25fSSatish Balay {
43849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
43987828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
44049b5e25fSSatish Balay   MatScalar      *v;
4416849ba73SBarry Smith   PetscErrorCode ierr;
44213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
44398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
44449b5e25fSSatish Balay 
44549b5e25fSSatish Balay   PetscFunctionBegin;
4462dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4471ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4481ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
44949b5e25fSSatish Balay 
45049b5e25fSSatish Balay   v     = a->a;
45149b5e25fSSatish Balay   xb = x;
45249b5e25fSSatish Balay 
45349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
45449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
45549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
45649b5e25fSSatish Balay     ib = aj + *ai;
457831a3094SHong Zhang     jmin = 0;
45898c9bda7SSatish Balay     nonzerorow += (n>0);
4597fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
46049b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
46149b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
46249b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
46349b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
46449b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
465831a3094SHong Zhang       v += 25; jmin++;
4667fbae186SHong Zhang     }
467831a3094SHong Zhang     for (j=jmin; j<n; j++) {
46849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
46949b5e25fSSatish Balay       cval       = ib[j]*5;
47049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
47149b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
47249b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
47349b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
47449b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
47549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
47649b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
47749b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
47849b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
47949b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
48049b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
48149b5e25fSSatish Balay       v  += 25;
48249b5e25fSSatish Balay     }
48349b5e25fSSatish Balay     xb +=5; ai++;
48449b5e25fSSatish Balay   }
48549b5e25fSSatish Balay 
4861ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4871ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
488dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
48949b5e25fSSatish Balay   PetscFunctionReturn(0);
49049b5e25fSSatish Balay }
49149b5e25fSSatish Balay 
49249b5e25fSSatish Balay 
4934a2ae208SSatish Balay #undef __FUNCT__
4944a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
495dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
49649b5e25fSSatish Balay {
49749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
49887828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
49949b5e25fSSatish Balay   MatScalar      *v;
5006849ba73SBarry Smith   PetscErrorCode ierr;
50113f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
50298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
50349b5e25fSSatish Balay 
50449b5e25fSSatish Balay   PetscFunctionBegin;
5052dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5061ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5071ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
50849b5e25fSSatish Balay 
50949b5e25fSSatish Balay   v     = a->a;
51049b5e25fSSatish Balay   xb = x;
51149b5e25fSSatish Balay 
51249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
51349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
51449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
51549b5e25fSSatish Balay     ib = aj + *ai;
516831a3094SHong Zhang     jmin = 0;
51798c9bda7SSatish Balay     nonzerorow += (n>0);
5187fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
51949b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
52049b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
52149b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
52249b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
52349b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
52449b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
525831a3094SHong Zhang       v += 36; jmin++;
5267fbae186SHong Zhang     }
527831a3094SHong Zhang     for (j=jmin; j<n; j++) {
52849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
52949b5e25fSSatish Balay       cval       = ib[j]*6;
53049b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
53149b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
53249b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
53349b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
53449b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
53549b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
53649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
53749b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
53849b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
53949b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
54049b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
54149b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
54249b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
54349b5e25fSSatish Balay       v  += 36;
54449b5e25fSSatish Balay     }
54549b5e25fSSatish Balay     xb +=6; ai++;
54649b5e25fSSatish Balay   }
54749b5e25fSSatish Balay 
5481ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5491ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
550dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
55149b5e25fSSatish Balay   PetscFunctionReturn(0);
55249b5e25fSSatish Balay }
5534a2ae208SSatish Balay #undef __FUNCT__
5544a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
555dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
55649b5e25fSSatish Balay {
55749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
55887828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
55949b5e25fSSatish Balay   MatScalar      *v;
5606849ba73SBarry Smith   PetscErrorCode ierr;
56113f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
56298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
56349b5e25fSSatish Balay 
56449b5e25fSSatish Balay   PetscFunctionBegin;
5652dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5661ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5671ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
56849b5e25fSSatish Balay 
56949b5e25fSSatish Balay   v     = a->a;
57049b5e25fSSatish Balay   xb = x;
57149b5e25fSSatish Balay 
57249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
57349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
57449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
57549b5e25fSSatish Balay     ib = aj + *ai;
576831a3094SHong Zhang     jmin = 0;
57798c9bda7SSatish Balay     nonzerorow += (n>0);
5787fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
57949b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
58049b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
58149b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
58249b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
58349b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
58449b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
58549b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
586831a3094SHong Zhang       v += 49; jmin++;
5877fbae186SHong Zhang     }
588831a3094SHong Zhang     for (j=jmin; j<n; j++) {
58949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
59049b5e25fSSatish Balay       cval       = ib[j]*7;
59149b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
59249b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
59349b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
59449b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
59549b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
59649b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
59749b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
59849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
59949b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
60049b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
60149b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
60249b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
60349b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
60449b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
60549b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
60649b5e25fSSatish Balay       v  += 49;
60749b5e25fSSatish Balay     }
60849b5e25fSSatish Balay     xb +=7; ai++;
60949b5e25fSSatish Balay   }
6101ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6111ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
612dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
61349b5e25fSSatish Balay   PetscFunctionReturn(0);
61449b5e25fSSatish Balay }
61549b5e25fSSatish Balay 
61649b5e25fSSatish Balay /*
61749b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
61849b5e25fSSatish Balay */
6194a2ae208SSatish Balay #undef __FUNCT__
6204a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
621dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
62249b5e25fSSatish Balay {
62349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
62487828ca2SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
6250b60a74dSHong Zhang   MatScalar      *v;
626dfbe8321SBarry Smith   PetscErrorCode ierr;
627d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
62898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
62949b5e25fSSatish Balay 
63049b5e25fSSatish Balay   PetscFunctionBegin;
6312dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
6321ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
6331ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
63449b5e25fSSatish Balay 
63549b5e25fSSatish Balay   aj   = a->j;
63649b5e25fSSatish Balay   v    = a->a;
63749b5e25fSSatish Balay   ii   = a->i;
63849b5e25fSSatish Balay 
63949b5e25fSSatish Balay   if (!a->mult_work) {
640d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
64149b5e25fSSatish Balay   }
64249b5e25fSSatish Balay   work = a->mult_work;
64349b5e25fSSatish Balay 
64449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
64549b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
64649b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
64798c9bda7SSatish Balay     nonzerorow += (n>0);
64849b5e25fSSatish Balay 
64949b5e25fSSatish Balay     /* upper triangular part */
65049b5e25fSSatish Balay     for (j=0; j<n; j++) {
65149b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
65249b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
65349b5e25fSSatish Balay       workt += bs;
65449b5e25fSSatish Balay     }
65549b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
65649b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
65749b5e25fSSatish Balay 
65849b5e25fSSatish Balay     /* strict lower triangular part */
659831a3094SHong Zhang     idx = aj+ii[0];
660831a3094SHong Zhang     if (*idx == i){
66196b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
662831a3094SHong Zhang     }
66396b9376eSHong Zhang 
66449b5e25fSSatish Balay     if (ncols > 0){
66549b5e25fSSatish Balay       workt = work;
66687828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
667831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
668831a3094SHong Zhang       for (j=0; j<n; j++) {
669831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
67049b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
67149b5e25fSSatish Balay         workt += bs;
67249b5e25fSSatish Balay       }
67349b5e25fSSatish Balay     }
67449b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
67549b5e25fSSatish Balay   }
67649b5e25fSSatish Balay 
6771ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6781ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
679dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr);
68049b5e25fSSatish Balay   PetscFunctionReturn(0);
68149b5e25fSSatish Balay }
68249b5e25fSSatish Balay 
6836a65c766SHong Zhang extern PetscErrorCode VecCopy_Seq(Vec,Vec);
6844a2ae208SSatish Balay #undef __FUNCT__
6854a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
686dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
68749b5e25fSSatish Balay {
68849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
689bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1;
69049b5e25fSSatish Balay   MatScalar      *v;
6916849ba73SBarry Smith   PetscErrorCode ierr;
69213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
69398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
69449b5e25fSSatish Balay 
69549b5e25fSSatish Balay   PetscFunctionBegin;
6966a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
6971ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6981ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
69949b5e25fSSatish Balay   v  = a->a;
70049b5e25fSSatish Balay   xb = x;
70149b5e25fSSatish Balay 
70249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
70349b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
70449b5e25fSSatish Balay     x1 = xb[0];
70549b5e25fSSatish Balay     ib = aj + *ai;
706831a3094SHong Zhang     jmin = 0;
70798c9bda7SSatish Balay     nonzerorow += (n>0);
708831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
709831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
710831a3094SHong Zhang     }
711831a3094SHong Zhang     for (j=jmin; j<n; j++) {
71249b5e25fSSatish Balay       cval    = *ib;
71349b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
71449b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
71549b5e25fSSatish Balay     }
71649b5e25fSSatish Balay     xb++; ai++;
71749b5e25fSSatish Balay   }
71849b5e25fSSatish Balay 
7191ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
720bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
72149b5e25fSSatish Balay 
722dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
72349b5e25fSSatish Balay   PetscFunctionReturn(0);
72449b5e25fSSatish Balay }
72549b5e25fSSatish Balay 
7264a2ae208SSatish Balay #undef __FUNCT__
7274a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
728dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
72949b5e25fSSatish Balay {
73049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
731bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2;
73249b5e25fSSatish Balay   MatScalar      *v;
7336849ba73SBarry Smith   PetscErrorCode ierr;
73413f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
73598c9bda7SSatish Balay   PetscInt       nonzerorow=0;
73649b5e25fSSatish Balay 
73749b5e25fSSatish Balay   PetscFunctionBegin;
7386a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7391ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7401ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
74149b5e25fSSatish Balay 
74249b5e25fSSatish Balay   v  = a->a;
74349b5e25fSSatish Balay   xb = x;
74449b5e25fSSatish Balay 
74549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
74649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
74749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
74849b5e25fSSatish Balay     ib = aj + *ai;
749831a3094SHong Zhang     jmin = 0;
75098c9bda7SSatish Balay     nonzerorow += (n>0);
7517fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
75249b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
75349b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
754831a3094SHong Zhang       v += 4; jmin++;
7557fbae186SHong Zhang     }
756831a3094SHong Zhang     for (j=jmin; j<n; j++) {
75749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
75849b5e25fSSatish Balay       cval       = ib[j]*2;
75949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
76049b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
76149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
76249b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
76349b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
76449b5e25fSSatish Balay       v  += 4;
76549b5e25fSSatish Balay     }
76649b5e25fSSatish Balay     xb +=2; ai++;
76749b5e25fSSatish Balay   }
7681ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
769bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
77049b5e25fSSatish Balay 
771dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
77249b5e25fSSatish Balay   PetscFunctionReturn(0);
77349b5e25fSSatish Balay }
77449b5e25fSSatish Balay 
7754a2ae208SSatish Balay #undef __FUNCT__
7764a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
777dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
77849b5e25fSSatish Balay {
77949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
780bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3;
78149b5e25fSSatish Balay   MatScalar      *v;
7826849ba73SBarry Smith   PetscErrorCode ierr;
78313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
78498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
78549b5e25fSSatish Balay 
78649b5e25fSSatish Balay   PetscFunctionBegin;
7876a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7881ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7891ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
79049b5e25fSSatish Balay 
79149b5e25fSSatish Balay   v     = a->a;
79249b5e25fSSatish Balay   xb = x;
79349b5e25fSSatish Balay 
79449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
79549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
79649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
79749b5e25fSSatish Balay     ib = aj + *ai;
798831a3094SHong Zhang     jmin = 0;
79998c9bda7SSatish Balay     nonzerorow += (n>0);
8007fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
80149b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
80249b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
80349b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
804831a3094SHong Zhang      v += 9; jmin++;
8057fbae186SHong Zhang     }
806831a3094SHong Zhang     for (j=jmin; j<n; j++) {
80749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
80849b5e25fSSatish Balay       cval       = ib[j]*3;
80949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
81049b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
81149b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
81249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
81349b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
81449b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
81549b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
81649b5e25fSSatish Balay       v  += 9;
81749b5e25fSSatish Balay     }
81849b5e25fSSatish Balay     xb +=3; ai++;
81949b5e25fSSatish Balay   }
82049b5e25fSSatish Balay 
8211ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
822bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
82349b5e25fSSatish Balay 
824dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
82549b5e25fSSatish Balay   PetscFunctionReturn(0);
82649b5e25fSSatish Balay }
82749b5e25fSSatish Balay 
8284a2ae208SSatish Balay #undef __FUNCT__
8294a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
830dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
83149b5e25fSSatish Balay {
83249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
833bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
83449b5e25fSSatish Balay   MatScalar      *v;
8356849ba73SBarry Smith   PetscErrorCode ierr;
83613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
83798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
83849b5e25fSSatish Balay 
83949b5e25fSSatish Balay   PetscFunctionBegin;
8406a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8411ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8421ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
84349b5e25fSSatish Balay 
84449b5e25fSSatish Balay   v     = a->a;
84549b5e25fSSatish Balay   xb = x;
84649b5e25fSSatish Balay 
84749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
84849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
84949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
85049b5e25fSSatish Balay     ib = aj + *ai;
851831a3094SHong Zhang     jmin = 0;
85298c9bda7SSatish Balay     nonzerorow += (n>0);
8537fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
85449b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
85549b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
85649b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
85749b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
858831a3094SHong Zhang       v += 16; jmin++;
8597fbae186SHong Zhang     }
860831a3094SHong Zhang     for (j=jmin; j<n; j++) {
86149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
86249b5e25fSSatish Balay       cval       = ib[j]*4;
86349b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
86449b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
86549b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
86649b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
86749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
86849b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
86949b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
87049b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
87149b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
87249b5e25fSSatish Balay       v  += 16;
87349b5e25fSSatish Balay     }
87449b5e25fSSatish Balay     xb +=4; ai++;
87549b5e25fSSatish Balay   }
87649b5e25fSSatish Balay 
8771ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
878bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
87949b5e25fSSatish Balay 
880dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
88149b5e25fSSatish Balay   PetscFunctionReturn(0);
88249b5e25fSSatish Balay }
88349b5e25fSSatish Balay 
8844a2ae208SSatish Balay #undef __FUNCT__
8854a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
886dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
88749b5e25fSSatish Balay {
88849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
889bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
89049b5e25fSSatish Balay   MatScalar      *v;
8916849ba73SBarry Smith   PetscErrorCode ierr;
89213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
89398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
89449b5e25fSSatish Balay 
89549b5e25fSSatish Balay   PetscFunctionBegin;
8966a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8971ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8981ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
89949b5e25fSSatish Balay 
90049b5e25fSSatish Balay   v     = a->a;
90149b5e25fSSatish Balay   xb = x;
90249b5e25fSSatish Balay 
90349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
90449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
90549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
90649b5e25fSSatish Balay     ib = aj + *ai;
907831a3094SHong Zhang     jmin = 0;
90898c9bda7SSatish Balay     nonzerorow += (n>0);
9097fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
91049b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
91149b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
91249b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
91349b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
91449b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
915831a3094SHong Zhang       v += 25; jmin++;
9167fbae186SHong Zhang     }
917831a3094SHong Zhang     for (j=jmin; j<n; j++) {
91849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
91949b5e25fSSatish Balay       cval       = ib[j]*5;
92049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
92149b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
92249b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
92349b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
92449b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
92549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
92649b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
92749b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
92849b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
92949b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
93049b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
93149b5e25fSSatish Balay       v  += 25;
93249b5e25fSSatish Balay     }
93349b5e25fSSatish Balay     xb +=5; ai++;
93449b5e25fSSatish Balay   }
93549b5e25fSSatish Balay 
9361ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
937bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
93849b5e25fSSatish Balay 
939dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
94049b5e25fSSatish Balay   PetscFunctionReturn(0);
94149b5e25fSSatish Balay }
9424a2ae208SSatish Balay #undef __FUNCT__
9434a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
944dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
94549b5e25fSSatish Balay {
94649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
947bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
94849b5e25fSSatish Balay   MatScalar      *v;
9496849ba73SBarry Smith   PetscErrorCode ierr;
95013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
95198c9bda7SSatish Balay   PetscInt       nonzerorow=0;
95249b5e25fSSatish Balay 
95349b5e25fSSatish Balay   PetscFunctionBegin;
9546a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9551ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9561ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
95749b5e25fSSatish Balay 
95849b5e25fSSatish Balay   v     = a->a;
95949b5e25fSSatish Balay   xb = x;
96049b5e25fSSatish Balay 
96149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
96249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
96349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
96449b5e25fSSatish Balay     ib = aj + *ai;
965831a3094SHong Zhang     jmin = 0;
96698c9bda7SSatish Balay     nonzerorow += (n>0);
9677fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
96849b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
96949b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
97049b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
97149b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
97249b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
97349b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
974831a3094SHong Zhang       v += 36; jmin++;
9757fbae186SHong Zhang     }
976831a3094SHong Zhang     for (j=jmin; j<n; j++) {
97749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
97849b5e25fSSatish Balay       cval       = ib[j]*6;
97949b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
98049b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
98149b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
98249b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
98349b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
98449b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
98549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
98649b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
98749b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
98849b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
98949b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
99049b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
99149b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
99249b5e25fSSatish Balay       v  += 36;
99349b5e25fSSatish Balay     }
99449b5e25fSSatish Balay     xb +=6; ai++;
99549b5e25fSSatish Balay   }
99649b5e25fSSatish Balay 
9971ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
998bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
99949b5e25fSSatish Balay 
1000dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
100149b5e25fSSatish Balay   PetscFunctionReturn(0);
100249b5e25fSSatish Balay }
100349b5e25fSSatish Balay 
10044a2ae208SSatish Balay #undef __FUNCT__
10054a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
1006dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
100749b5e25fSSatish Balay {
100849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1009bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
101049b5e25fSSatish Balay   MatScalar      *v;
10116849ba73SBarry Smith   PetscErrorCode ierr;
101213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
101398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
101449b5e25fSSatish Balay 
101549b5e25fSSatish Balay   PetscFunctionBegin;
10166a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
10171ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
10181ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
101949b5e25fSSatish Balay 
102049b5e25fSSatish Balay   v     = a->a;
102149b5e25fSSatish Balay   xb = x;
102249b5e25fSSatish Balay 
102349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
102449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
102549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
102649b5e25fSSatish Balay     ib = aj + *ai;
1027831a3094SHong Zhang     jmin = 0;
102898c9bda7SSatish Balay     nonzerorow += (n>0);
10297fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
103049b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
103149b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
103249b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
103349b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
103449b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
103549b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
103649b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1037831a3094SHong Zhang       v += 49; jmin++;
10387fbae186SHong Zhang     }
1039831a3094SHong Zhang     for (j=jmin; j<n; j++) {
104049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
104149b5e25fSSatish Balay       cval       = ib[j]*7;
104249b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
104349b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
104449b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
104549b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
104649b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
104749b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
104849b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
104949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
105049b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
105149b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
105249b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
105349b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
105449b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
105549b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
105649b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
105749b5e25fSSatish Balay       v  += 49;
105849b5e25fSSatish Balay     }
105949b5e25fSSatish Balay     xb +=7; ai++;
106049b5e25fSSatish Balay   }
106149b5e25fSSatish Balay 
10621ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1063bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
106449b5e25fSSatish Balay 
1065dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
106649b5e25fSSatish Balay   PetscFunctionReturn(0);
106749b5e25fSSatish Balay }
106849b5e25fSSatish Balay 
10694a2ae208SSatish Balay #undef __FUNCT__
10704a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1071dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
107249b5e25fSSatish Balay {
107349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1074bba805e6SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1075066653e3SSatish Balay   MatScalar      *v;
1076dfbe8321SBarry Smith   PetscErrorCode ierr;
1077d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
107898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
107949b5e25fSSatish Balay 
108049b5e25fSSatish Balay   PetscFunctionBegin;
10816a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
10821ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
10831ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
108449b5e25fSSatish Balay 
108549b5e25fSSatish Balay   aj   = a->j;
108649b5e25fSSatish Balay   v    = a->a;
108749b5e25fSSatish Balay   ii   = a->i;
108849b5e25fSSatish Balay 
108949b5e25fSSatish Balay   if (!a->mult_work) {
1090d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
109149b5e25fSSatish Balay   }
109249b5e25fSSatish Balay   work = a->mult_work;
109349b5e25fSSatish Balay 
109449b5e25fSSatish Balay 
109549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
109649b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
109749b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
109898c9bda7SSatish Balay     nonzerorow += (n>0);
109949b5e25fSSatish Balay 
110049b5e25fSSatish Balay     /* upper triangular part */
110149b5e25fSSatish Balay     for (j=0; j<n; j++) {
110249b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
110349b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
110449b5e25fSSatish Balay       workt += bs;
110549b5e25fSSatish Balay     }
110649b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
110749b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
110849b5e25fSSatish Balay 
110949b5e25fSSatish Balay     /* strict lower triangular part */
1110831a3094SHong Zhang     idx = aj+ii[0];
1111831a3094SHong Zhang     if (*idx == i){
111296b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1113831a3094SHong Zhang     }
111449b5e25fSSatish Balay     if (ncols > 0){
111549b5e25fSSatish Balay       workt = work;
111687828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1117831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1118831a3094SHong Zhang       for (j=0; j<n; j++) {
1119831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
112049b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
112149b5e25fSSatish Balay         workt += bs;
112249b5e25fSSatish Balay       }
112349b5e25fSSatish Balay     }
112449b5e25fSSatish Balay 
112549b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
112649b5e25fSSatish Balay   }
112749b5e25fSSatish Balay 
11281ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1129bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
113049b5e25fSSatish Balay 
1131dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
113249b5e25fSSatish Balay   PetscFunctionReturn(0);
113349b5e25fSSatish Balay }
113449b5e25fSSatish Balay 
11354a2ae208SSatish Balay #undef __FUNCT__
11364a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1137f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
113849b5e25fSSatish Balay {
113949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1140f4df32b1SMatthew Knepley   PetscScalar    oalpha = alpha;
1141efee365bSSatish Balay   PetscErrorCode ierr;
11420805154bSBarry Smith   PetscBLASInt   one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz);
114349b5e25fSSatish Balay 
114449b5e25fSSatish Balay   PetscFunctionBegin;
1145f4df32b1SMatthew Knepley   BLASscal_(&totalnz,&oalpha,a->a,&one);
1146efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
114749b5e25fSSatish Balay   PetscFunctionReturn(0);
114849b5e25fSSatish Balay }
114949b5e25fSSatish Balay 
11504a2ae208SSatish Balay #undef __FUNCT__
11514a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1152dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
115349b5e25fSSatish Balay {
115449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
115549b5e25fSSatish Balay   MatScalar      *v = a->a;
115649b5e25fSSatish Balay   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1157d0f46423SBarry Smith   PetscInt       i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
11586849ba73SBarry Smith   PetscErrorCode ierr;
115913f74950SBarry Smith   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
116049b5e25fSSatish Balay 
116149b5e25fSSatish Balay   PetscFunctionBegin;
116249b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
116349b5e25fSSatish Balay     for (k=0; k<mbs; k++){
116449b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1165831a3094SHong Zhang       col  = aj + jmin;
1166831a3094SHong Zhang       if (*col == k){         /* diagonal block */
116749b5e25fSSatish Balay         for (i=0; i<bs2; i++){
116849b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
116949b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
117049b5e25fSSatish Balay #else
117149b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
117249b5e25fSSatish Balay #endif
117349b5e25fSSatish Balay         }
1174831a3094SHong Zhang         jmin++;
1175831a3094SHong Zhang       }
1176831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
117749b5e25fSSatish Balay         for (i=0; i<bs2; i++){
117849b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
117949b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
118049b5e25fSSatish Balay #else
118149b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
118249b5e25fSSatish Balay #endif
118349b5e25fSSatish Balay         }
118449b5e25fSSatish Balay       }
118549b5e25fSSatish Balay     }
118649b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
11870b8dc8d2SHong Zhang   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
11880b8dc8d2SHong Zhang     ierr = PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);CHKERRQ(ierr);
11890b8dc8d2SHong Zhang     jl   = il + mbs;
11900b8dc8d2SHong Zhang     sum  = (PetscReal*)(jl + mbs);
11910b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11920b8dc8d2SHong Zhang     il[0] = 0;
119349b5e25fSSatish Balay 
119449b5e25fSSatish Balay     *norm = 0.0;
119549b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
119649b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
119749b5e25fSSatish Balay       /*-- col sum --*/
119849b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1199831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1200831a3094SHong Zhang                   at step k */
120149b5e25fSSatish Balay       while (i<mbs){
120249b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
120349b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
120449b5e25fSSatish Balay         for (j=0; j<bs; j++){
120549b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
120649b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
120749b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
120849b5e25fSSatish Balay           }
120949b5e25fSSatish Balay         }
121049b5e25fSSatish Balay         /* update il, jl */
1211831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1212831a3094SHong Zhang         jmax = a->i[i+1];
121349b5e25fSSatish Balay         if (jmin < jmax){
121449b5e25fSSatish Balay           il[i] = jmin;
121549b5e25fSSatish Balay           j   = a->j[jmin];
121649b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
121749b5e25fSSatish Balay         }
121849b5e25fSSatish Balay         i = nexti;
121949b5e25fSSatish Balay       }
122049b5e25fSSatish Balay       /*-- row sum --*/
122149b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
122249b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
122349b5e25fSSatish Balay         for (j=0; j<bs; j++){
122449b5e25fSSatish Balay           v = a->a + i*bs2 + j;
122549b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
12260b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
122749b5e25fSSatish Balay           }
122849b5e25fSSatish Balay         }
122949b5e25fSSatish Balay       }
123049b5e25fSSatish Balay       /* add k_th block row to il, jl */
1231831a3094SHong Zhang       col = aj+jmin;
1232831a3094SHong Zhang       if (*col == k) jmin++;
123349b5e25fSSatish Balay       if (jmin < jmax){
123449b5e25fSSatish Balay         il[k] = jmin;
12350b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
123649b5e25fSSatish Balay       }
123749b5e25fSSatish Balay       for (j=0; j<bs; j++){
123849b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
123949b5e25fSSatish Balay       }
124049b5e25fSSatish Balay     }
124149b5e25fSSatish Balay     ierr = PetscFree(il);CHKERRQ(ierr);
124249b5e25fSSatish Balay   } else {
1243347d480fSBarry Smith     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
124449b5e25fSSatish Balay   }
124549b5e25fSSatish Balay   PetscFunctionReturn(0);
124649b5e25fSSatish Balay }
124749b5e25fSSatish Balay 
12484a2ae208SSatish Balay #undef __FUNCT__
12494a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1250dfbe8321SBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
125149b5e25fSSatish Balay {
125249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1253dfbe8321SBarry Smith   PetscErrorCode ierr;
125449b5e25fSSatish Balay 
125549b5e25fSSatish Balay   PetscFunctionBegin;
125649b5e25fSSatish Balay 
125749b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1258d0f46423SBarry Smith   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1259ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1260ef511fbeSHong Zhang     PetscFunctionReturn(0);
126149b5e25fSSatish Balay   }
126249b5e25fSSatish Balay 
126349b5e25fSSatish Balay   /* if the a->i are the same */
126413f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1265abc0a331SBarry Smith   if (!*flg) {
126649b5e25fSSatish Balay     PetscFunctionReturn(0);
126749b5e25fSSatish Balay   }
126849b5e25fSSatish Balay 
126949b5e25fSSatish Balay   /* if a->j are the same */
127013f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1271abc0a331SBarry Smith   if (!*flg) {
127249b5e25fSSatish Balay     PetscFunctionReturn(0);
127349b5e25fSSatish Balay   }
127449b5e25fSSatish Balay   /* if a->a are the same */
1275d0f46423SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1276935af2e7SHong Zhang   PetscFunctionReturn(0);
127749b5e25fSSatish Balay }
127849b5e25fSSatish Balay 
12794a2ae208SSatish Balay #undef __FUNCT__
12804a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1281dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
128249b5e25fSSatish Balay {
128349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1284dfbe8321SBarry Smith   PetscErrorCode ierr;
128513f74950SBarry Smith   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
128687828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
128749b5e25fSSatish Balay   MatScalar      *aa,*aa_j;
128849b5e25fSSatish Balay 
128949b5e25fSSatish Balay   PetscFunctionBegin;
1290d0f46423SBarry Smith   bs   = A->rmap->bs;
129182799104SHong Zhang   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
129282799104SHong Zhang 
129349b5e25fSSatish Balay   aa   = a->a;
129449b5e25fSSatish Balay   ai   = a->i;
129549b5e25fSSatish Balay   aj   = a->j;
129649b5e25fSSatish Balay   ambs = a->mbs;
129749b5e25fSSatish Balay   bs2  = a->bs2;
129849b5e25fSSatish Balay 
12992dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
13001ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
130149b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1302d0f46423SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
130349b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
130449b5e25fSSatish Balay     j=ai[i];
130549b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
130649b5e25fSSatish Balay       row  = i*bs;
130749b5e25fSSatish Balay       aa_j = aa + j*bs2;
130882799104SHong Zhang       if (A->factor && bs==1){
130982799104SHong Zhang         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
131082799104SHong Zhang       } else {
131149b5e25fSSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
131249b5e25fSSatish Balay       }
131349b5e25fSSatish Balay     }
131482799104SHong Zhang   }
131582799104SHong Zhang 
13161ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
131749b5e25fSSatish Balay   PetscFunctionReturn(0);
131849b5e25fSSatish Balay }
131949b5e25fSSatish Balay 
13204a2ae208SSatish Balay #undef __FUNCT__
13214a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1322dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
132349b5e25fSSatish Balay {
132449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
13255e90f9d9SHong Zhang   PetscScalar    *l,x,*li,*ri;
132649b5e25fSSatish Balay   MatScalar      *aa,*v;
1327dfbe8321SBarry Smith   PetscErrorCode ierr;
13285e90f9d9SHong Zhang   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1329b3bf805bSHong Zhang   PetscTruth     flg;
133049b5e25fSSatish Balay 
133149b5e25fSSatish Balay   PetscFunctionBegin;
1332b3bf805bSHong Zhang   if (ll != rr){
1333b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1334b3bf805bSHong Zhang     if (!flg)
1335b3bf805bSHong Zhang       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1336b3bf805bSHong Zhang   }
1337b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
133849b5e25fSSatish Balay   ai  = a->i;
133949b5e25fSSatish Balay   aj  = a->j;
134049b5e25fSSatish Balay   aa  = a->a;
1341d0f46423SBarry Smith   m   = A->rmap->N;
1342d0f46423SBarry Smith   bs  = A->rmap->bs;
134349b5e25fSSatish Balay   mbs = a->mbs;
134449b5e25fSSatish Balay   bs2 = a->bs2;
134549b5e25fSSatish Balay 
13461ebc52fbSHong Zhang   ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
134749b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1348347d480fSBarry Smith   if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
134949b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
135049b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
135149b5e25fSSatish Balay     li = l + i*bs;
135249b5e25fSSatish Balay     v  = aa + bs2*ai[i];
135349b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
135449b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
13555e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
135649b5e25fSSatish Balay         x = ri[k];
135749b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
135849b5e25fSSatish Balay       }
135949b5e25fSSatish Balay     }
136049b5e25fSSatish Balay   }
13611ebc52fbSHong Zhang   ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1362dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
136349b5e25fSSatish Balay   PetscFunctionReturn(0);
136449b5e25fSSatish Balay }
136549b5e25fSSatish Balay 
13664a2ae208SSatish Balay #undef __FUNCT__
13674a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1368dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
136949b5e25fSSatish Balay {
137049b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
137149b5e25fSSatish Balay 
137249b5e25fSSatish Balay   PetscFunctionBegin;
137349b5e25fSSatish Balay   info->block_size     = a->bs2;
13746c6c5352SBarry Smith   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
13756c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
137649b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
137749b5e25fSSatish Balay   info->assemblies   = A->num_ass;
137849b5e25fSSatish Balay   info->mallocs      = a->reallocs;
13797adad957SLisandro Dalcin   info->memory       = ((PetscObject)A)->mem;
138049b5e25fSSatish Balay   if (A->factor) {
138149b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
138249b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
138349b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
138449b5e25fSSatish Balay   } else {
138549b5e25fSSatish Balay     info->fill_ratio_given  = 0;
138649b5e25fSSatish Balay     info->fill_ratio_needed = 0;
138749b5e25fSSatish Balay     info->factor_mallocs    = 0;
138849b5e25fSSatish Balay   }
138949b5e25fSSatish Balay   PetscFunctionReturn(0);
139049b5e25fSSatish Balay }
139149b5e25fSSatish Balay 
139249b5e25fSSatish Balay 
13934a2ae208SSatish Balay #undef __FUNCT__
13944a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1395dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
139649b5e25fSSatish Balay {
139749b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1398dfbe8321SBarry Smith   PetscErrorCode ierr;
139949b5e25fSSatish Balay 
140049b5e25fSSatish Balay   PetscFunctionBegin;
140149b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
140249b5e25fSSatish Balay   PetscFunctionReturn(0);
140349b5e25fSSatish Balay }
1404dc354874SHong Zhang 
14054a2ae208SSatish Balay #undef __FUNCT__
1406985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ"
1407985db425SBarry Smith /*
1408985db425SBarry Smith    This code does not work since it only checks the upper triangular part of
1409985db425SBarry Smith   the matrix. Hence it is not listed in the function table.
1410985db425SBarry Smith */
1411985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1412dc354874SHong Zhang {
1413dc354874SHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1414dfbe8321SBarry Smith   PetscErrorCode ierr;
141513f74950SBarry Smith   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1416c3fca9a7SHong Zhang   PetscReal      atmp;
1417273d9f13SBarry Smith   MatScalar      *aa;
1418985db425SBarry Smith   PetscScalar    *x;
141913f74950SBarry Smith   PetscInt       ncols,brow,bcol,krow,kcol;
1420dc354874SHong Zhang 
1421dc354874SHong Zhang   PetscFunctionBegin;
1422985db425SBarry Smith   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1423dc354874SHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1424d0f46423SBarry Smith   bs   = A->rmap->bs;
1425dc354874SHong Zhang   aa   = a->a;
1426dc354874SHong Zhang   ai   = a->i;
1427dc354874SHong Zhang   aj   = a->j;
142844117c81SHong Zhang   mbs = a->mbs;
1429dc354874SHong Zhang 
1430985db425SBarry Smith   ierr = VecSet(v,0.0);CHKERRQ(ierr);
14311ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1432dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1433d0f46423SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
143444117c81SHong Zhang   for (i=0; i<mbs; i++) {
1435d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1436d0f6400bSHong Zhang     brow  = bs*i;
143744117c81SHong Zhang     for (j=0; j<ncols; j++){
1438d0f6400bSHong Zhang       bcol = bs*(*aj);
143944117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1440d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
144144117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1442d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1443d0f6400bSHong Zhang           row = brow + krow;    /* row index */
1444c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1445c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
144644117c81SHong Zhang         }
144744117c81SHong Zhang       }
1448d0f6400bSHong Zhang       aj++;
1449dc354874SHong Zhang     }
1450dc354874SHong Zhang   }
14511ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1452dc354874SHong Zhang   PetscFunctionReturn(0);
1453dc354874SHong Zhang }
1454