xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision d0f46423f772d871f92d805d83c5bf0c050e33b4)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
249b5e25fSSatish Balay 
349b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h"
449b5e25fSSatish Balay #include "src/inline/spops.h"
549b5e25fSSatish Balay #include "src/inline/ilu.h"
649b5e25fSSatish Balay #include "petscbt.h"
73a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h"
849b5e25fSSatish Balay 
94a2ae208SSatish Balay #undef __FUNCT__
104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1113f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1249b5e25fSSatish Balay {
135eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
146849ba73SBarry Smith   PetscErrorCode ierr;
15521d7252SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*idx,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
16521d7252SBarry Smith   PetscBT        table,table0;
17d94109b8SHong Zhang 
18d94109b8SHong Zhang   PetscFunctionBegin;
19b3bf805bSHong Zhang   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20d94109b8SHong Zhang   mbs = a->mbs;
21d94109b8SHong Zhang   ai  = a->i;
22d94109b8SHong Zhang   aj  = a->j;
23*d0f46423SBarry Smith   bs  = A->rmap->bs;
24d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr);
2513f74950SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
26*d0f46423SBarry Smith   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
27d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr);
28d94109b8SHong Zhang 
29d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
30d94109b8SHong Zhang     isz  = 0;
31d94109b8SHong Zhang     ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr);
32d94109b8SHong Zhang 
33d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
34d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
35d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
36d94109b8SHong Zhang 
37d94109b8SHong Zhang     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
38dbe03f88SHong Zhang     bcol_max = 0;
39d94109b8SHong Zhang     for (j=0; j<n ; ++j){
40d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
41d94109b8SHong Zhang       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42dbe03f88SHong Zhang       if(!PetscBTLookupSet(table,brow)) {
43dbe03f88SHong Zhang         nidx[isz++] = brow;
44dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
45dbe03f88SHong Zhang       }
46d94109b8SHong Zhang     }
47d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
48d94109b8SHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
49d94109b8SHong Zhang 
50d94109b8SHong Zhang     k = 0;
51d94109b8SHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
52d94109b8SHong Zhang       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53d94109b8SHong Zhang       ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr);
54efee365bSSatish Balay       for (l=k; l<isz; l++) { ierr = PetscBTSet(table0,nidx[l]);CHKERRQ(ierr); }
55d94109b8SHong Zhang 
56d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
57d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++){
58d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
59d94109b8SHong Zhang         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
60d94109b8SHong Zhang           for (l = start; l<end ; l++){
61d94109b8SHong Zhang             bcol = aj[l];
62d94109b8SHong Zhang             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
63d94109b8SHong Zhang           }
64d94109b8SHong Zhang           k++;
65d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
66d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
67d94109b8SHong Zhang           for (l = start; l<end ; l++){
68d94109b8SHong Zhang             bcol = aj[l];
69dbe03f88SHong Zhang             if (bcol > bcol_max) break;
70d94109b8SHong Zhang             if (PetscBTLookup(table0,bcol)){
71d94109b8SHong Zhang               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
72d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
73d94109b8SHong Zhang             }
74d94109b8SHong Zhang           }
75d94109b8SHong Zhang         }
76d94109b8SHong Zhang       }
77d94109b8SHong Zhang     } /* for each overlap */
78d94109b8SHong Zhang 
79d94109b8SHong Zhang     /* expand the Index Set */
80d94109b8SHong Zhang     for (j=0; j<isz; j++) {
81d94109b8SHong Zhang       for (k=0; k<bs; k++)
82d94109b8SHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
83d94109b8SHong Zhang     }
84d94109b8SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
85d94109b8SHong Zhang   }
86d94109b8SHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
87d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
88d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
89d94109b8SHong Zhang   ierr = PetscBTDestroy(table0);CHKERRQ(ierr);
905eee224dSHong Zhang   PetscFunctionReturn(0);
9149b5e25fSSatish Balay }
9249b5e25fSSatish Balay 
934a2ae208SSatish Balay #undef __FUNCT__
944a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
9513f74950SBarry Smith PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
9649b5e25fSSatish Balay {
9749b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
986849ba73SBarry Smith   PetscErrorCode ierr;
9913f74950SBarry Smith   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
10013f74950SBarry Smith   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
101*d0f46423SBarry Smith   PetscInt       *irow,nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
10213f74950SBarry Smith   PetscInt       *aj = a->j,*ai = a->i;
10349b5e25fSSatish Balay   MatScalar      *mat_a;
10449b5e25fSSatish Balay   Mat            C;
10549b5e25fSSatish Balay   PetscTruth     flag;
10649b5e25fSSatish Balay 
10749b5e25fSSatish Balay   PetscFunctionBegin;
108e005ede5SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
10949b5e25fSSatish Balay   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
110347d480fSBarry Smith   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
11149b5e25fSSatish Balay 
11249b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11349b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11449b5e25fSSatish Balay 
11513f74950SBarry Smith   ierr  = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr);
11649b5e25fSSatish Balay   ssmap = smap;
11713f74950SBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
11813f74950SBarry Smith   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
11949b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
12049b5e25fSSatish Balay   /* determine lens of each row */
12149b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12249b5e25fSSatish Balay     kstart  = ai[irow[i]];
12349b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
12449b5e25fSSatish Balay     lens[i] = 0;
12549b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
12649b5e25fSSatish Balay         if (ssmap[aj[k]]) {
12749b5e25fSSatish Balay           lens[i]++;
12849b5e25fSSatish Balay         }
12949b5e25fSSatish Balay       }
13049b5e25fSSatish Balay     }
13149b5e25fSSatish Balay   /* Create and fill new matrix */
13249b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
13349b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
13449b5e25fSSatish Balay 
135*d0f46423SBarry Smith     if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
13613f74950SBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
137abc0a331SBarry Smith     if (!flag) {
138347d480fSBarry Smith       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
13949b5e25fSSatish Balay     }
14013f74950SBarry Smith     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
14149b5e25fSSatish Balay     C = *B;
14249b5e25fSSatish Balay   } else {
1437adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
144f69a0ea3SMatthew Knepley     ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1457adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
146ab93d7beSBarry Smith     ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr);
14749b5e25fSSatish Balay   }
14849b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
14949b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
15049b5e25fSSatish Balay     row    = irow[i];
15149b5e25fSSatish Balay     kstart = ai[row];
15249b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
15349b5e25fSSatish Balay     mat_i  = c->i[i];
15449b5e25fSSatish Balay     mat_j  = c->j + mat_i;
15549b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
15649b5e25fSSatish Balay     mat_ilen = c->ilen + i;
15749b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
15849b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
15949b5e25fSSatish Balay         *mat_j++ = tcol - 1;
16049b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
16149b5e25fSSatish Balay         mat_a   += bs2;
16249b5e25fSSatish Balay         (*mat_ilen)++;
16349b5e25fSSatish Balay       }
16449b5e25fSSatish Balay     }
16549b5e25fSSatish Balay   }
16649b5e25fSSatish Balay 
16749b5e25fSSatish Balay   /* Free work space */
16849b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
16949b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
17049b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17149b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17249b5e25fSSatish Balay 
17349b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
17449b5e25fSSatish Balay   *B = C;
17549b5e25fSSatish Balay   PetscFunctionReturn(0);
17649b5e25fSSatish Balay }
17749b5e25fSSatish Balay 
1784a2ae208SSatish Balay #undef __FUNCT__
1794a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
18013f74950SBarry Smith PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
18149b5e25fSSatish Balay {
18249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
18349b5e25fSSatish Balay   IS             is1;
1846849ba73SBarry Smith   PetscErrorCode ierr;
185*d0f46423SBarry Smith   PetscInt       *vary,*iary,*irow,nrows,i,bs=A->rmap->bs,count;
18649b5e25fSSatish Balay 
18749b5e25fSSatish Balay   PetscFunctionBegin;
188e005ede5SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
18949b5e25fSSatish Balay 
19049b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
19149b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
19249b5e25fSSatish Balay 
19349b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
19449b5e25fSSatish Balay    and form the IS with compressed IS */
19513f74950SBarry Smith   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr);
19649b5e25fSSatish Balay   iary = vary + a->mbs;
19713f74950SBarry Smith   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
19849b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
19949b5e25fSSatish Balay 
20049b5e25fSSatish Balay   count = 0;
20149b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
202e005ede5SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
20349b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
20449b5e25fSSatish Balay   }
20549b5e25fSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
20649b5e25fSSatish Balay 
20749b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
20849b5e25fSSatish Balay   ierr = PetscFree(vary);CHKERRQ(ierr);
20949b5e25fSSatish Balay 
21049b5e25fSSatish Balay   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr);
21149b5e25fSSatish Balay   ISDestroy(is1);
21249b5e25fSSatish Balay   PetscFunctionReturn(0);
21349b5e25fSSatish Balay }
21449b5e25fSSatish Balay 
2154a2ae208SSatish Balay #undef __FUNCT__
2164a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
21713f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
21849b5e25fSSatish Balay {
2196849ba73SBarry Smith   PetscErrorCode ierr;
22013f74950SBarry Smith   PetscInt       i;
22149b5e25fSSatish Balay 
22249b5e25fSSatish Balay   PetscFunctionBegin;
22349b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
22482502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
22549b5e25fSSatish Balay   }
22649b5e25fSSatish Balay 
22749b5e25fSSatish Balay   for (i=0; i<n; i++) {
22849b5e25fSSatish Balay     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
22949b5e25fSSatish Balay   }
23049b5e25fSSatish Balay   PetscFunctionReturn(0);
23149b5e25fSSatish Balay }
23249b5e25fSSatish Balay 
23349b5e25fSSatish Balay /* -------------------------------------------------------*/
23449b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
23549b5e25fSSatish Balay /* -------------------------------------------------------*/
236d9eff348SSatish Balay #include "petscblaslapack.h"
23749b5e25fSSatish Balay 
2384a2ae208SSatish Balay #undef __FUNCT__
2394a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1"
240dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
24149b5e25fSSatish Balay {
24249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
24387828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,zero=0.0;
24449b5e25fSSatish Balay   MatScalar      *v;
2456849ba73SBarry Smith   PetscErrorCode ierr;
24613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
24798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
24849b5e25fSSatish Balay 
24949b5e25fSSatish Balay   PetscFunctionBegin;
2502dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2511ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2521ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
25349b5e25fSSatish Balay 
25449b5e25fSSatish Balay   v  = a->a;
25549b5e25fSSatish Balay   xb = x;
25649b5e25fSSatish Balay 
25749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
25849b5e25fSSatish Balay     n    = ai[1] - ai[0];  /* length of i_th row of A */
2593cb60589SBarry Smith     x1   = *xb++;
2603cb60589SBarry Smith     ib   = aj + *ai++;
261831a3094SHong Zhang     jmin = 0;
26298c9bda7SSatish Balay     nonzerorow += (n>0);
2633cb60589SBarry Smith     /* if we ALWAYS required a diagonal entry then could remove this if test */
2643cb60589SBarry Smith     /* should we use a tmp to hold the accumulated z[i] */
265831a3094SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
266831a3094SHong Zhang       z[i] += *v++ * x[*ib++];
267831a3094SHong Zhang       jmin++;
268831a3094SHong Zhang     }
269831a3094SHong Zhang     for (j=jmin; j<n; j++) {
27049b5e25fSSatish Balay       cval    = *ib;
27149b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
27249b5e25fSSatish Balay       z[i]    += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
27349b5e25fSSatish Balay     }
27449b5e25fSSatish Balay   }
27549b5e25fSSatish Balay 
2761ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2771ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
27898c9bda7SSatish Balay   ierr = PetscLogFlops(2*(a->nz*2 - nonzerorow) - nonzerorow);CHKERRQ(ierr);  /* nz = (nz+m)/2 */
27949b5e25fSSatish Balay   PetscFunctionReturn(0);
28049b5e25fSSatish Balay }
28149b5e25fSSatish Balay 
2824a2ae208SSatish Balay #undef __FUNCT__
2834a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
284dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
28549b5e25fSSatish Balay {
28649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
28787828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
28849b5e25fSSatish Balay   MatScalar      *v;
2896849ba73SBarry Smith   PetscErrorCode ierr;
29013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
29198c9bda7SSatish Balay   PetscInt       nonzerorow=0;
29249b5e25fSSatish Balay 
29349b5e25fSSatish Balay   PetscFunctionBegin;
2942dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2951ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2961ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
29749b5e25fSSatish Balay 
29849b5e25fSSatish Balay   v     = a->a;
29949b5e25fSSatish Balay   xb = x;
30049b5e25fSSatish Balay 
30149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
30249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
30349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
30449b5e25fSSatish Balay     ib = aj + *ai;
305831a3094SHong Zhang     jmin = 0;
30698c9bda7SSatish Balay     nonzerorow += (n>0);
3077fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
30849b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
30949b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
310831a3094SHong Zhang       v += 4; jmin++;
3117fbae186SHong Zhang     }
312831a3094SHong Zhang     for (j=jmin; j<n; j++) {
31349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
31449b5e25fSSatish Balay       cval       = ib[j]*2;
31549b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
31649b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
31749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
31849b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
31949b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
32049b5e25fSSatish Balay       v  += 4;
32149b5e25fSSatish Balay     }
32249b5e25fSSatish Balay     xb +=2; ai++;
32349b5e25fSSatish Balay   }
32449b5e25fSSatish Balay 
3251ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3261ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
32798c9bda7SSatish Balay   ierr = PetscLogFlops(8*(a->nz*2 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
32849b5e25fSSatish Balay   PetscFunctionReturn(0);
32949b5e25fSSatish Balay }
33049b5e25fSSatish Balay 
3314a2ae208SSatish Balay #undef __FUNCT__
3324a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
333dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
33449b5e25fSSatish Balay {
33549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
33687828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
33749b5e25fSSatish Balay   MatScalar      *v;
3386849ba73SBarry Smith   PetscErrorCode ierr;
33913f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
34098c9bda7SSatish Balay   PetscInt       nonzerorow=0;
34149b5e25fSSatish Balay 
34249b5e25fSSatish Balay   PetscFunctionBegin;
3432dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3441ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3451ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
34649b5e25fSSatish Balay 
34749b5e25fSSatish Balay   v    = a->a;
34849b5e25fSSatish Balay   xb   = x;
34949b5e25fSSatish Balay 
35049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
35149b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
35249b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
35349b5e25fSSatish Balay     ib = aj + *ai;
354831a3094SHong Zhang     jmin = 0;
35598c9bda7SSatish Balay     nonzerorow += (n>0);
3567fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
35749b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
35849b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
35949b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
360831a3094SHong Zhang       v += 9; jmin++;
3617fbae186SHong Zhang     }
362831a3094SHong Zhang     for (j=jmin; j<n; j++) {
36349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
36449b5e25fSSatish Balay       cval       = ib[j]*3;
36549b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
36649b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
36749b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
36849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
36949b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
37049b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
37149b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
37249b5e25fSSatish Balay       v  += 9;
37349b5e25fSSatish Balay     }
37449b5e25fSSatish Balay     xb +=3; ai++;
37549b5e25fSSatish Balay   }
37649b5e25fSSatish Balay 
3771ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3781ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
37998c9bda7SSatish Balay   ierr = PetscLogFlops(18*(a->nz*2 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
38049b5e25fSSatish Balay   PetscFunctionReturn(0);
38149b5e25fSSatish Balay }
38249b5e25fSSatish Balay 
3834a2ae208SSatish Balay #undef __FUNCT__
3844a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
385dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
38649b5e25fSSatish Balay {
38749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
38887828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
38949b5e25fSSatish Balay   MatScalar      *v;
3906849ba73SBarry Smith   PetscErrorCode ierr;
39113f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
39298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
39349b5e25fSSatish Balay 
39449b5e25fSSatish Balay   PetscFunctionBegin;
3952dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3961ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3971ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
39849b5e25fSSatish Balay 
39949b5e25fSSatish Balay   v     = a->a;
40049b5e25fSSatish Balay   xb = x;
40149b5e25fSSatish Balay 
40249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
40349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
40449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
40549b5e25fSSatish Balay     ib = aj + *ai;
406831a3094SHong Zhang     jmin = 0;
40798c9bda7SSatish Balay     nonzerorow += (n>0);
4087fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
40949b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
41049b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
41149b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
41249b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
413831a3094SHong Zhang       v += 16; jmin++;
4147fbae186SHong Zhang     }
415831a3094SHong Zhang     for (j=jmin; j<n; j++) {
41649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
41749b5e25fSSatish Balay       cval       = ib[j]*4;
41849b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
41949b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
42049b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
42149b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
42249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
42349b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
42449b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
42549b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
42649b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
42749b5e25fSSatish Balay       v  += 16;
42849b5e25fSSatish Balay     }
42949b5e25fSSatish Balay     xb +=4; ai++;
43049b5e25fSSatish Balay   }
43149b5e25fSSatish Balay 
4321ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4331ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
43498c9bda7SSatish Balay   ierr = PetscLogFlops(32*(a->nz*2 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
43549b5e25fSSatish Balay   PetscFunctionReturn(0);
43649b5e25fSSatish Balay }
43749b5e25fSSatish Balay 
4384a2ae208SSatish Balay #undef __FUNCT__
4394a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
440dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
44149b5e25fSSatish Balay {
44249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
44387828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
44449b5e25fSSatish Balay   MatScalar      *v;
4456849ba73SBarry Smith   PetscErrorCode ierr;
44613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
44798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
44849b5e25fSSatish Balay 
44949b5e25fSSatish Balay   PetscFunctionBegin;
4502dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4511ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4521ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
45349b5e25fSSatish Balay 
45449b5e25fSSatish Balay   v     = a->a;
45549b5e25fSSatish Balay   xb = x;
45649b5e25fSSatish Balay 
45749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
45849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
45949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
46049b5e25fSSatish Balay     ib = aj + *ai;
461831a3094SHong Zhang     jmin = 0;
46298c9bda7SSatish Balay     nonzerorow += (n>0);
4637fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
46449b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
46549b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
46649b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
46749b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
46849b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
469831a3094SHong Zhang       v += 25; jmin++;
4707fbae186SHong Zhang     }
471831a3094SHong Zhang     for (j=jmin; j<n; j++) {
47249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
47349b5e25fSSatish Balay       cval       = ib[j]*5;
47449b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
47549b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
47649b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
47749b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
47849b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
47949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
48049b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
48149b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
48249b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
48349b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
48449b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
48549b5e25fSSatish Balay       v  += 25;
48649b5e25fSSatish Balay     }
48749b5e25fSSatish Balay     xb +=5; ai++;
48849b5e25fSSatish Balay   }
48949b5e25fSSatish Balay 
4901ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4911ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
49298c9bda7SSatish Balay   ierr = PetscLogFlops(50*(a->nz*2 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
49349b5e25fSSatish Balay   PetscFunctionReturn(0);
49449b5e25fSSatish Balay }
49549b5e25fSSatish Balay 
49649b5e25fSSatish Balay 
4974a2ae208SSatish Balay #undef __FUNCT__
4984a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
499dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
50049b5e25fSSatish Balay {
50149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
50287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
50349b5e25fSSatish Balay   MatScalar      *v;
5046849ba73SBarry Smith   PetscErrorCode ierr;
50513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
50698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
50749b5e25fSSatish Balay 
50849b5e25fSSatish Balay   PetscFunctionBegin;
5092dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5101ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5111ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
51249b5e25fSSatish Balay 
51349b5e25fSSatish Balay   v     = a->a;
51449b5e25fSSatish Balay   xb = x;
51549b5e25fSSatish Balay 
51649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
51749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
51849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
51949b5e25fSSatish Balay     ib = aj + *ai;
520831a3094SHong Zhang     jmin = 0;
52198c9bda7SSatish Balay     nonzerorow += (n>0);
5227fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
52349b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
52449b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
52549b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
52649b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
52749b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
52849b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
529831a3094SHong Zhang       v += 36; jmin++;
5307fbae186SHong Zhang     }
531831a3094SHong Zhang     for (j=jmin; j<n; j++) {
53249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
53349b5e25fSSatish Balay       cval       = ib[j]*6;
53449b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
53549b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
53649b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
53749b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
53849b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
53949b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
54049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
54149b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
54249b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
54349b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
54449b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
54549b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
54649b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
54749b5e25fSSatish Balay       v  += 36;
54849b5e25fSSatish Balay     }
54949b5e25fSSatish Balay     xb +=6; ai++;
55049b5e25fSSatish Balay   }
55149b5e25fSSatish Balay 
5521ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5531ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
55498c9bda7SSatish Balay   ierr = PetscLogFlops(72*(a->nz*2 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
55549b5e25fSSatish Balay   PetscFunctionReturn(0);
55649b5e25fSSatish Balay }
5574a2ae208SSatish Balay #undef __FUNCT__
5584a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
559dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
56049b5e25fSSatish Balay {
56149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
56287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
56349b5e25fSSatish Balay   MatScalar      *v;
5646849ba73SBarry Smith   PetscErrorCode ierr;
56513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
56698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
56749b5e25fSSatish Balay 
56849b5e25fSSatish Balay   PetscFunctionBegin;
5692dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5701ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5711ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
57249b5e25fSSatish Balay 
57349b5e25fSSatish Balay   v     = a->a;
57449b5e25fSSatish Balay   xb = x;
57549b5e25fSSatish Balay 
57649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
57749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
57849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
57949b5e25fSSatish Balay     ib = aj + *ai;
580831a3094SHong Zhang     jmin = 0;
58198c9bda7SSatish Balay     nonzerorow += (n>0);
5827fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
58349b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
58449b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
58549b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
58649b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
58749b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
58849b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
58949b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
590831a3094SHong Zhang       v += 49; jmin++;
5917fbae186SHong Zhang     }
592831a3094SHong Zhang     for (j=jmin; j<n; j++) {
59349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
59449b5e25fSSatish Balay       cval       = ib[j]*7;
59549b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
59649b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
59749b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
59849b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
59949b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
60049b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
60149b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
60249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
60349b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
60449b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
60549b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
60649b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
60749b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
60849b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
60949b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
61049b5e25fSSatish Balay       v  += 49;
61149b5e25fSSatish Balay     }
61249b5e25fSSatish Balay     xb +=7; ai++;
61349b5e25fSSatish Balay   }
6141ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6151ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
61698c9bda7SSatish Balay   ierr = PetscLogFlops(98*(a->nz*2 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
61749b5e25fSSatish Balay   PetscFunctionReturn(0);
61849b5e25fSSatish Balay }
61949b5e25fSSatish Balay 
62049b5e25fSSatish Balay /*
62149b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
62249b5e25fSSatish Balay */
6234a2ae208SSatish Balay #undef __FUNCT__
6244a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
625dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
62649b5e25fSSatish Balay {
62749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
62887828ca2SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
6290b60a74dSHong Zhang   MatScalar      *v;
630dfbe8321SBarry Smith   PetscErrorCode ierr;
631*d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
63298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
63349b5e25fSSatish Balay 
63449b5e25fSSatish Balay   PetscFunctionBegin;
6352dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
6361ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
6371ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
63849b5e25fSSatish Balay 
63949b5e25fSSatish Balay   aj   = a->j;
64049b5e25fSSatish Balay   v    = a->a;
64149b5e25fSSatish Balay   ii   = a->i;
64249b5e25fSSatish Balay 
64349b5e25fSSatish Balay   if (!a->mult_work) {
644*d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
64549b5e25fSSatish Balay   }
64649b5e25fSSatish Balay   work = a->mult_work;
64749b5e25fSSatish Balay 
64849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
64949b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
65049b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
65198c9bda7SSatish Balay     nonzerorow += (n>0);
65249b5e25fSSatish Balay 
65349b5e25fSSatish Balay     /* upper triangular part */
65449b5e25fSSatish Balay     for (j=0; j<n; j++) {
65549b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
65649b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
65749b5e25fSSatish Balay       workt += bs;
65849b5e25fSSatish Balay     }
65949b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
66049b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
66149b5e25fSSatish Balay 
66249b5e25fSSatish Balay     /* strict lower triangular part */
663831a3094SHong Zhang     idx = aj+ii[0];
664831a3094SHong Zhang     if (*idx == i){
66596b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
666831a3094SHong Zhang     }
66796b9376eSHong Zhang 
66849b5e25fSSatish Balay     if (ncols > 0){
66949b5e25fSSatish Balay       workt = work;
67087828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
671831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
672831a3094SHong Zhang       for (j=0; j<n; j++) {
673831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
67449b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
67549b5e25fSSatish Balay         workt += bs;
67649b5e25fSSatish Balay       }
67749b5e25fSSatish Balay     }
67849b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
67949b5e25fSSatish Balay   }
68049b5e25fSSatish Balay 
6811ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6821ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
68398c9bda7SSatish Balay   ierr = PetscLogFlops(2*(a->nz*2 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr);
68449b5e25fSSatish Balay   PetscFunctionReturn(0);
68549b5e25fSSatish Balay }
68649b5e25fSSatish Balay 
6876a65c766SHong Zhang extern PetscErrorCode VecCopy_Seq(Vec,Vec);
6884a2ae208SSatish Balay #undef __FUNCT__
6894a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
690dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
69149b5e25fSSatish Balay {
69249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
693bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1;
69449b5e25fSSatish Balay   MatScalar      *v;
6956849ba73SBarry Smith   PetscErrorCode ierr;
69613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
69798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
69849b5e25fSSatish Balay 
69949b5e25fSSatish Balay   PetscFunctionBegin;
7006a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7011ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7021ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
70349b5e25fSSatish Balay   v  = a->a;
70449b5e25fSSatish Balay   xb = x;
70549b5e25fSSatish Balay 
70649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
70749b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
70849b5e25fSSatish Balay     x1 = xb[0];
70949b5e25fSSatish Balay     ib = aj + *ai;
710831a3094SHong Zhang     jmin = 0;
71198c9bda7SSatish Balay     nonzerorow += (n>0);
712831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
713831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
714831a3094SHong Zhang     }
715831a3094SHong Zhang     for (j=jmin; j<n; j++) {
71649b5e25fSSatish Balay       cval    = *ib;
71749b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
71849b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
71949b5e25fSSatish Balay     }
72049b5e25fSSatish Balay     xb++; ai++;
72149b5e25fSSatish Balay   }
72249b5e25fSSatish Balay 
7231ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
724bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
72549b5e25fSSatish Balay 
72698c9bda7SSatish Balay   ierr = PetscLogFlops(2*(a->nz*2 - nonzerorow));CHKERRQ(ierr);
72749b5e25fSSatish Balay   PetscFunctionReturn(0);
72849b5e25fSSatish Balay }
72949b5e25fSSatish Balay 
7304a2ae208SSatish Balay #undef __FUNCT__
7314a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
732dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
73349b5e25fSSatish Balay {
73449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
735bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2;
73649b5e25fSSatish Balay   MatScalar      *v;
7376849ba73SBarry Smith   PetscErrorCode ierr;
73813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
73998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
74049b5e25fSSatish Balay 
74149b5e25fSSatish Balay   PetscFunctionBegin;
7426a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7431ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7441ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
74549b5e25fSSatish Balay 
74649b5e25fSSatish Balay   v  = a->a;
74749b5e25fSSatish Balay   xb = x;
74849b5e25fSSatish Balay 
74949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
75049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
75149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
75249b5e25fSSatish Balay     ib = aj + *ai;
753831a3094SHong Zhang     jmin = 0;
75498c9bda7SSatish Balay     nonzerorow += (n>0);
7557fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
75649b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
75749b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
758831a3094SHong Zhang       v += 4; jmin++;
7597fbae186SHong Zhang     }
760831a3094SHong Zhang     for (j=jmin; j<n; j++) {
76149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
76249b5e25fSSatish Balay       cval       = ib[j]*2;
76349b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
76449b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
76549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
76649b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
76749b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
76849b5e25fSSatish Balay       v  += 4;
76949b5e25fSSatish Balay     }
77049b5e25fSSatish Balay     xb +=2; ai++;
77149b5e25fSSatish Balay   }
7721ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
773bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
77449b5e25fSSatish Balay 
77598c9bda7SSatish Balay   ierr = PetscLogFlops(4*(a->nz*2 - nonzerorow));CHKERRQ(ierr);
77649b5e25fSSatish Balay   PetscFunctionReturn(0);
77749b5e25fSSatish Balay }
77849b5e25fSSatish Balay 
7794a2ae208SSatish Balay #undef __FUNCT__
7804a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
781dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
78249b5e25fSSatish Balay {
78349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
784bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3;
78549b5e25fSSatish Balay   MatScalar      *v;
7866849ba73SBarry Smith   PetscErrorCode ierr;
78713f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
78898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
78949b5e25fSSatish Balay 
79049b5e25fSSatish Balay   PetscFunctionBegin;
7916a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7921ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7931ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
79449b5e25fSSatish Balay 
79549b5e25fSSatish Balay   v     = a->a;
79649b5e25fSSatish Balay   xb = x;
79749b5e25fSSatish Balay 
79849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
79949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
80049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
80149b5e25fSSatish Balay     ib = aj + *ai;
802831a3094SHong Zhang     jmin = 0;
80398c9bda7SSatish Balay     nonzerorow += (n>0);
8047fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
80549b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
80649b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
80749b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
808831a3094SHong Zhang      v += 9; jmin++;
8097fbae186SHong Zhang     }
810831a3094SHong Zhang     for (j=jmin; j<n; j++) {
81149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
81249b5e25fSSatish Balay       cval       = ib[j]*3;
81349b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
81449b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
81549b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
81649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
81749b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
81849b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
81949b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
82049b5e25fSSatish Balay       v  += 9;
82149b5e25fSSatish Balay     }
82249b5e25fSSatish Balay     xb +=3; ai++;
82349b5e25fSSatish Balay   }
82449b5e25fSSatish Balay 
8251ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
826bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
82749b5e25fSSatish Balay 
82898c9bda7SSatish Balay   ierr = PetscLogFlops(18*(a->nz*2 - nonzerorow));CHKERRQ(ierr);
82949b5e25fSSatish Balay   PetscFunctionReturn(0);
83049b5e25fSSatish Balay }
83149b5e25fSSatish Balay 
8324a2ae208SSatish Balay #undef __FUNCT__
8334a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
834dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
83549b5e25fSSatish Balay {
83649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
837bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
83849b5e25fSSatish Balay   MatScalar      *v;
8396849ba73SBarry Smith   PetscErrorCode ierr;
84013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
84198c9bda7SSatish Balay   PetscInt       nonzerorow=0;
84249b5e25fSSatish Balay 
84349b5e25fSSatish Balay   PetscFunctionBegin;
8446a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8451ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8461ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
84749b5e25fSSatish Balay 
84849b5e25fSSatish Balay   v     = a->a;
84949b5e25fSSatish Balay   xb = x;
85049b5e25fSSatish Balay 
85149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
85249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
85349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
85449b5e25fSSatish Balay     ib = aj + *ai;
855831a3094SHong Zhang     jmin = 0;
85698c9bda7SSatish Balay     nonzerorow += (n>0);
8577fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
85849b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
85949b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
86049b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
86149b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
862831a3094SHong Zhang       v += 16; jmin++;
8637fbae186SHong Zhang     }
864831a3094SHong Zhang     for (j=jmin; j<n; j++) {
86549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
86649b5e25fSSatish Balay       cval       = ib[j]*4;
86749b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
86849b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
86949b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
87049b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
87149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
87249b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
87349b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
87449b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
87549b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
87649b5e25fSSatish Balay       v  += 16;
87749b5e25fSSatish Balay     }
87849b5e25fSSatish Balay     xb +=4; ai++;
87949b5e25fSSatish Balay   }
88049b5e25fSSatish Balay 
8811ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
882bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
88349b5e25fSSatish Balay 
88498c9bda7SSatish Balay   ierr = PetscLogFlops(32*(a->nz*2 - nonzerorow));CHKERRQ(ierr);
88549b5e25fSSatish Balay   PetscFunctionReturn(0);
88649b5e25fSSatish Balay }
88749b5e25fSSatish Balay 
8884a2ae208SSatish Balay #undef __FUNCT__
8894a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
890dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
89149b5e25fSSatish Balay {
89249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
893bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
89449b5e25fSSatish Balay   MatScalar      *v;
8956849ba73SBarry Smith   PetscErrorCode ierr;
89613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
89798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
89849b5e25fSSatish Balay 
89949b5e25fSSatish Balay   PetscFunctionBegin;
9006a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9011ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9021ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
90349b5e25fSSatish Balay 
90449b5e25fSSatish Balay   v     = a->a;
90549b5e25fSSatish Balay   xb = x;
90649b5e25fSSatish Balay 
90749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
90849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
90949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
91049b5e25fSSatish Balay     ib = aj + *ai;
911831a3094SHong Zhang     jmin = 0;
91298c9bda7SSatish Balay     nonzerorow += (n>0);
9137fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
91449b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
91549b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
91649b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
91749b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
91849b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
919831a3094SHong Zhang       v += 25; jmin++;
9207fbae186SHong Zhang     }
921831a3094SHong Zhang     for (j=jmin; j<n; j++) {
92249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
92349b5e25fSSatish Balay       cval       = ib[j]*5;
92449b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
92549b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
92649b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
92749b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
92849b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
92949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
93049b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
93149b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
93249b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
93349b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
93449b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
93549b5e25fSSatish Balay       v  += 25;
93649b5e25fSSatish Balay     }
93749b5e25fSSatish Balay     xb +=5; ai++;
93849b5e25fSSatish Balay   }
93949b5e25fSSatish Balay 
9401ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
941bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
94249b5e25fSSatish Balay 
94398c9bda7SSatish Balay   ierr = PetscLogFlops(50*(a->nz*2 - nonzerorow));CHKERRQ(ierr);
94449b5e25fSSatish Balay   PetscFunctionReturn(0);
94549b5e25fSSatish Balay }
9464a2ae208SSatish Balay #undef __FUNCT__
9474a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
948dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
94949b5e25fSSatish Balay {
95049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
951bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
95249b5e25fSSatish Balay   MatScalar      *v;
9536849ba73SBarry Smith   PetscErrorCode ierr;
95413f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
95598c9bda7SSatish Balay   PetscInt       nonzerorow=0;
95649b5e25fSSatish Balay 
95749b5e25fSSatish Balay   PetscFunctionBegin;
9586a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9591ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9601ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
96149b5e25fSSatish Balay 
96249b5e25fSSatish Balay   v     = a->a;
96349b5e25fSSatish Balay   xb = x;
96449b5e25fSSatish Balay 
96549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
96649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
96749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
96849b5e25fSSatish Balay     ib = aj + *ai;
969831a3094SHong Zhang     jmin = 0;
97098c9bda7SSatish Balay     nonzerorow += (n>0);
9717fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
97249b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
97349b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
97449b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
97549b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
97649b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
97749b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
978831a3094SHong Zhang       v += 36; jmin++;
9797fbae186SHong Zhang     }
980831a3094SHong Zhang     for (j=jmin; j<n; j++) {
98149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
98249b5e25fSSatish Balay       cval       = ib[j]*6;
98349b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
98449b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
98549b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
98649b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
98749b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
98849b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
98949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
99049b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
99149b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
99249b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
99349b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
99449b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
99549b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
99649b5e25fSSatish Balay       v  += 36;
99749b5e25fSSatish Balay     }
99849b5e25fSSatish Balay     xb +=6; ai++;
99949b5e25fSSatish Balay   }
100049b5e25fSSatish Balay 
10011ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1002bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
100349b5e25fSSatish Balay 
100498c9bda7SSatish Balay   ierr = PetscLogFlops(72*(a->nz*2 - nonzerorow));CHKERRQ(ierr);
100549b5e25fSSatish Balay   PetscFunctionReturn(0);
100649b5e25fSSatish Balay }
100749b5e25fSSatish Balay 
10084a2ae208SSatish Balay #undef __FUNCT__
10094a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
1010dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
101149b5e25fSSatish Balay {
101249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1013bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
101449b5e25fSSatish Balay   MatScalar      *v;
10156849ba73SBarry Smith   PetscErrorCode ierr;
101613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
101798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
101849b5e25fSSatish Balay 
101949b5e25fSSatish Balay   PetscFunctionBegin;
10206a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
10211ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
10221ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
102349b5e25fSSatish Balay 
102449b5e25fSSatish Balay   v     = a->a;
102549b5e25fSSatish Balay   xb = x;
102649b5e25fSSatish Balay 
102749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
102849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
102949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
103049b5e25fSSatish Balay     ib = aj + *ai;
1031831a3094SHong Zhang     jmin = 0;
103298c9bda7SSatish Balay     nonzerorow += (n>0);
10337fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
103449b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
103549b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
103649b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
103749b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
103849b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
103949b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
104049b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1041831a3094SHong Zhang       v += 49; jmin++;
10427fbae186SHong Zhang     }
1043831a3094SHong Zhang     for (j=jmin; j<n; j++) {
104449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
104549b5e25fSSatish Balay       cval       = ib[j]*7;
104649b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
104749b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
104849b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
104949b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
105049b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
105149b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
105249b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
105349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
105449b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
105549b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
105649b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
105749b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
105849b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
105949b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
106049b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
106149b5e25fSSatish Balay       v  += 49;
106249b5e25fSSatish Balay     }
106349b5e25fSSatish Balay     xb +=7; ai++;
106449b5e25fSSatish Balay   }
106549b5e25fSSatish Balay 
10661ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1067bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
106849b5e25fSSatish Balay 
106998c9bda7SSatish Balay   ierr = PetscLogFlops(98*(a->nz*2 - nonzerorow));CHKERRQ(ierr);
107049b5e25fSSatish Balay   PetscFunctionReturn(0);
107149b5e25fSSatish Balay }
107249b5e25fSSatish Balay 
10734a2ae208SSatish Balay #undef __FUNCT__
10744a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1075dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
107649b5e25fSSatish Balay {
107749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1078bba805e6SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1079066653e3SSatish Balay   MatScalar      *v;
1080dfbe8321SBarry Smith   PetscErrorCode ierr;
1081*d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
108298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
108349b5e25fSSatish Balay 
108449b5e25fSSatish Balay   PetscFunctionBegin;
10856a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
10861ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
10871ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
108849b5e25fSSatish Balay 
108949b5e25fSSatish Balay   aj   = a->j;
109049b5e25fSSatish Balay   v    = a->a;
109149b5e25fSSatish Balay   ii   = a->i;
109249b5e25fSSatish Balay 
109349b5e25fSSatish Balay   if (!a->mult_work) {
1094*d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
109549b5e25fSSatish Balay   }
109649b5e25fSSatish Balay   work = a->mult_work;
109749b5e25fSSatish Balay 
109849b5e25fSSatish Balay 
109949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
110049b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
110149b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
110298c9bda7SSatish Balay     nonzerorow += (n>0);
110349b5e25fSSatish Balay 
110449b5e25fSSatish Balay     /* upper triangular part */
110549b5e25fSSatish Balay     for (j=0; j<n; j++) {
110649b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
110749b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
110849b5e25fSSatish Balay       workt += bs;
110949b5e25fSSatish Balay     }
111049b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
111149b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
111249b5e25fSSatish Balay 
111349b5e25fSSatish Balay     /* strict lower triangular part */
1114831a3094SHong Zhang     idx = aj+ii[0];
1115831a3094SHong Zhang     if (*idx == i){
111696b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1117831a3094SHong Zhang     }
111849b5e25fSSatish Balay     if (ncols > 0){
111949b5e25fSSatish Balay       workt = work;
112087828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1121831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1122831a3094SHong Zhang       for (j=0; j<n; j++) {
1123831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
112449b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
112549b5e25fSSatish Balay         workt += bs;
112649b5e25fSSatish Balay       }
112749b5e25fSSatish Balay     }
112849b5e25fSSatish Balay 
112949b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
113049b5e25fSSatish Balay   }
113149b5e25fSSatish Balay 
11321ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1133bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
113449b5e25fSSatish Balay 
113598c9bda7SSatish Balay   ierr = PetscLogFlops(2*(a->nz*2 - nonzerorow));CHKERRQ(ierr);
113649b5e25fSSatish Balay   PetscFunctionReturn(0);
113749b5e25fSSatish Balay }
113849b5e25fSSatish Balay 
11394a2ae208SSatish Balay #undef __FUNCT__
11404a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1141f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
114249b5e25fSSatish Balay {
114349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1144f4df32b1SMatthew Knepley   PetscScalar    oalpha = alpha;
1145efee365bSSatish Balay   PetscErrorCode ierr;
11460805154bSBarry Smith   PetscBLASInt   one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz);
114749b5e25fSSatish Balay 
114849b5e25fSSatish Balay   PetscFunctionBegin;
1149f4df32b1SMatthew Knepley   BLASscal_(&totalnz,&oalpha,a->a,&one);
1150efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
115149b5e25fSSatish Balay   PetscFunctionReturn(0);
115249b5e25fSSatish Balay }
115349b5e25fSSatish Balay 
11544a2ae208SSatish Balay #undef __FUNCT__
11554a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1156dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
115749b5e25fSSatish Balay {
115849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
115949b5e25fSSatish Balay   MatScalar      *v = a->a;
116049b5e25fSSatish Balay   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1161*d0f46423SBarry Smith   PetscInt       i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
11626849ba73SBarry Smith   PetscErrorCode ierr;
116313f74950SBarry Smith   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
116449b5e25fSSatish Balay 
116549b5e25fSSatish Balay   PetscFunctionBegin;
116649b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
116749b5e25fSSatish Balay     for (k=0; k<mbs; k++){
116849b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1169831a3094SHong Zhang       col  = aj + jmin;
1170831a3094SHong Zhang       if (*col == k){         /* diagonal block */
117149b5e25fSSatish Balay         for (i=0; i<bs2; i++){
117249b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
117349b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
117449b5e25fSSatish Balay #else
117549b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
117649b5e25fSSatish Balay #endif
117749b5e25fSSatish Balay         }
1178831a3094SHong Zhang         jmin++;
1179831a3094SHong Zhang       }
1180831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
118149b5e25fSSatish Balay         for (i=0; i<bs2; i++){
118249b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
118349b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
118449b5e25fSSatish Balay #else
118549b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
118649b5e25fSSatish Balay #endif
118749b5e25fSSatish Balay         }
118849b5e25fSSatish Balay       }
118949b5e25fSSatish Balay     }
119049b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
11910b8dc8d2SHong Zhang   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
11920b8dc8d2SHong Zhang     ierr = PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);CHKERRQ(ierr);
11930b8dc8d2SHong Zhang     jl   = il + mbs;
11940b8dc8d2SHong Zhang     sum  = (PetscReal*)(jl + mbs);
11950b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11960b8dc8d2SHong Zhang     il[0] = 0;
119749b5e25fSSatish Balay 
119849b5e25fSSatish Balay     *norm = 0.0;
119949b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
120049b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
120149b5e25fSSatish Balay       /*-- col sum --*/
120249b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1203831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1204831a3094SHong Zhang                   at step k */
120549b5e25fSSatish Balay       while (i<mbs){
120649b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
120749b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
120849b5e25fSSatish Balay         for (j=0; j<bs; j++){
120949b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
121049b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
121149b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
121249b5e25fSSatish Balay           }
121349b5e25fSSatish Balay         }
121449b5e25fSSatish Balay         /* update il, jl */
1215831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1216831a3094SHong Zhang         jmax = a->i[i+1];
121749b5e25fSSatish Balay         if (jmin < jmax){
121849b5e25fSSatish Balay           il[i] = jmin;
121949b5e25fSSatish Balay           j   = a->j[jmin];
122049b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
122149b5e25fSSatish Balay         }
122249b5e25fSSatish Balay         i = nexti;
122349b5e25fSSatish Balay       }
122449b5e25fSSatish Balay       /*-- row sum --*/
122549b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
122649b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
122749b5e25fSSatish Balay         for (j=0; j<bs; j++){
122849b5e25fSSatish Balay           v = a->a + i*bs2 + j;
122949b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
12300b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
123149b5e25fSSatish Balay           }
123249b5e25fSSatish Balay         }
123349b5e25fSSatish Balay       }
123449b5e25fSSatish Balay       /* add k_th block row to il, jl */
1235831a3094SHong Zhang       col = aj+jmin;
1236831a3094SHong Zhang       if (*col == k) jmin++;
123749b5e25fSSatish Balay       if (jmin < jmax){
123849b5e25fSSatish Balay         il[k] = jmin;
12390b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
124049b5e25fSSatish Balay       }
124149b5e25fSSatish Balay       for (j=0; j<bs; j++){
124249b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
124349b5e25fSSatish Balay       }
124449b5e25fSSatish Balay     }
124549b5e25fSSatish Balay     ierr = PetscFree(il);CHKERRQ(ierr);
124649b5e25fSSatish Balay   } else {
1247347d480fSBarry Smith     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
124849b5e25fSSatish Balay   }
124949b5e25fSSatish Balay   PetscFunctionReturn(0);
125049b5e25fSSatish Balay }
125149b5e25fSSatish Balay 
12524a2ae208SSatish Balay #undef __FUNCT__
12534a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1254dfbe8321SBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
125549b5e25fSSatish Balay {
125649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1257dfbe8321SBarry Smith   PetscErrorCode ierr;
125849b5e25fSSatish Balay 
125949b5e25fSSatish Balay   PetscFunctionBegin;
126049b5e25fSSatish Balay 
126149b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1262*d0f46423SBarry Smith   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1263ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1264ef511fbeSHong Zhang     PetscFunctionReturn(0);
126549b5e25fSSatish Balay   }
126649b5e25fSSatish Balay 
126749b5e25fSSatish Balay   /* if the a->i are the same */
126813f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1269abc0a331SBarry Smith   if (!*flg) {
127049b5e25fSSatish Balay     PetscFunctionReturn(0);
127149b5e25fSSatish Balay   }
127249b5e25fSSatish Balay 
127349b5e25fSSatish Balay   /* if a->j are the same */
127413f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1275abc0a331SBarry Smith   if (!*flg) {
127649b5e25fSSatish Balay     PetscFunctionReturn(0);
127749b5e25fSSatish Balay   }
127849b5e25fSSatish Balay   /* if a->a are the same */
1279*d0f46423SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1280935af2e7SHong Zhang   PetscFunctionReturn(0);
128149b5e25fSSatish Balay }
128249b5e25fSSatish Balay 
12834a2ae208SSatish Balay #undef __FUNCT__
12844a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1285dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
128649b5e25fSSatish Balay {
128749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1288dfbe8321SBarry Smith   PetscErrorCode ierr;
128913f74950SBarry Smith   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
129087828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
129149b5e25fSSatish Balay   MatScalar      *aa,*aa_j;
129249b5e25fSSatish Balay 
129349b5e25fSSatish Balay   PetscFunctionBegin;
1294*d0f46423SBarry Smith   bs   = A->rmap->bs;
129582799104SHong Zhang   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
129682799104SHong Zhang 
129749b5e25fSSatish Balay   aa   = a->a;
129849b5e25fSSatish Balay   ai   = a->i;
129949b5e25fSSatish Balay   aj   = a->j;
130049b5e25fSSatish Balay   ambs = a->mbs;
130149b5e25fSSatish Balay   bs2  = a->bs2;
130249b5e25fSSatish Balay 
13032dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
13041ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
130549b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1306*d0f46423SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
130749b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
130849b5e25fSSatish Balay     j=ai[i];
130949b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
131049b5e25fSSatish Balay       row  = i*bs;
131149b5e25fSSatish Balay       aa_j = aa + j*bs2;
131282799104SHong Zhang       if (A->factor && bs==1){
131382799104SHong Zhang         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
131482799104SHong Zhang       } else {
131549b5e25fSSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
131649b5e25fSSatish Balay       }
131749b5e25fSSatish Balay     }
131882799104SHong Zhang   }
131982799104SHong Zhang 
13201ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
132149b5e25fSSatish Balay   PetscFunctionReturn(0);
132249b5e25fSSatish Balay }
132349b5e25fSSatish Balay 
13244a2ae208SSatish Balay #undef __FUNCT__
13254a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1326dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
132749b5e25fSSatish Balay {
132849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
13295e90f9d9SHong Zhang   PetscScalar    *l,x,*li,*ri;
133049b5e25fSSatish Balay   MatScalar      *aa,*v;
1331dfbe8321SBarry Smith   PetscErrorCode ierr;
13325e90f9d9SHong Zhang   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1333b3bf805bSHong Zhang   PetscTruth     flg;
133449b5e25fSSatish Balay 
133549b5e25fSSatish Balay   PetscFunctionBegin;
1336b3bf805bSHong Zhang   if (ll != rr){
1337b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1338b3bf805bSHong Zhang     if (!flg)
1339b3bf805bSHong Zhang       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1340b3bf805bSHong Zhang   }
1341b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
134249b5e25fSSatish Balay   ai  = a->i;
134349b5e25fSSatish Balay   aj  = a->j;
134449b5e25fSSatish Balay   aa  = a->a;
1345*d0f46423SBarry Smith   m   = A->rmap->N;
1346*d0f46423SBarry Smith   bs  = A->rmap->bs;
134749b5e25fSSatish Balay   mbs = a->mbs;
134849b5e25fSSatish Balay   bs2 = a->bs2;
134949b5e25fSSatish Balay 
13501ebc52fbSHong Zhang   ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
135149b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1352347d480fSBarry Smith   if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
135349b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
135449b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
135549b5e25fSSatish Balay     li = l + i*bs;
135649b5e25fSSatish Balay     v  = aa + bs2*ai[i];
135749b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
135849b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
13595e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
136049b5e25fSSatish Balay         x = ri[k];
136149b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
136249b5e25fSSatish Balay       }
136349b5e25fSSatish Balay     }
136449b5e25fSSatish Balay   }
13651ebc52fbSHong Zhang   ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1366efee365bSSatish Balay   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
136749b5e25fSSatish Balay   PetscFunctionReturn(0);
136849b5e25fSSatish Balay }
136949b5e25fSSatish Balay 
13704a2ae208SSatish Balay #undef __FUNCT__
13714a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1372dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
137349b5e25fSSatish Balay {
137449b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
137549b5e25fSSatish Balay 
137649b5e25fSSatish Balay   PetscFunctionBegin;
1377*d0f46423SBarry Smith   info->rows_global    = (double)A->rmap->N;
1378*d0f46423SBarry Smith   info->columns_global = (double)A->rmap->N;
1379*d0f46423SBarry Smith   info->rows_local     = (double)A->rmap->N;
1380*d0f46423SBarry Smith   info->columns_local  = (double)A->rmap->N;
138149b5e25fSSatish Balay   info->block_size     = a->bs2;
13826c6c5352SBarry Smith   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
13836c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
138449b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
138549b5e25fSSatish Balay   info->assemblies   = A->num_ass;
138649b5e25fSSatish Balay   info->mallocs      = a->reallocs;
13877adad957SLisandro Dalcin   info->memory       = ((PetscObject)A)->mem;
138849b5e25fSSatish Balay   if (A->factor) {
138949b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
139049b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
139149b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
139249b5e25fSSatish Balay   } else {
139349b5e25fSSatish Balay     info->fill_ratio_given  = 0;
139449b5e25fSSatish Balay     info->fill_ratio_needed = 0;
139549b5e25fSSatish Balay     info->factor_mallocs    = 0;
139649b5e25fSSatish Balay   }
139749b5e25fSSatish Balay   PetscFunctionReturn(0);
139849b5e25fSSatish Balay }
139949b5e25fSSatish Balay 
140049b5e25fSSatish Balay 
14014a2ae208SSatish Balay #undef __FUNCT__
14024a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1403dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
140449b5e25fSSatish Balay {
140549b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1406dfbe8321SBarry Smith   PetscErrorCode ierr;
140749b5e25fSSatish Balay 
140849b5e25fSSatish Balay   PetscFunctionBegin;
140949b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
141049b5e25fSSatish Balay   PetscFunctionReturn(0);
141149b5e25fSSatish Balay }
1412dc354874SHong Zhang 
14134a2ae208SSatish Balay #undef __FUNCT__
1414985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ"
1415985db425SBarry Smith /*
1416985db425SBarry Smith    This code does not work since it only checks the upper triangular part of
1417985db425SBarry Smith   the matrix. Hence it is not listed in the function table.
1418985db425SBarry Smith */
1419985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1420dc354874SHong Zhang {
1421dc354874SHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1422dfbe8321SBarry Smith   PetscErrorCode ierr;
142313f74950SBarry Smith   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1424c3fca9a7SHong Zhang   PetscReal      atmp;
1425273d9f13SBarry Smith   MatScalar      *aa;
1426985db425SBarry Smith   PetscScalar    *x;
142713f74950SBarry Smith   PetscInt       ncols,brow,bcol,krow,kcol;
1428dc354874SHong Zhang 
1429dc354874SHong Zhang   PetscFunctionBegin;
1430985db425SBarry Smith   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1431dc354874SHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1432*d0f46423SBarry Smith   bs   = A->rmap->bs;
1433dc354874SHong Zhang   aa   = a->a;
1434dc354874SHong Zhang   ai   = a->i;
1435dc354874SHong Zhang   aj   = a->j;
143644117c81SHong Zhang   mbs = a->mbs;
1437dc354874SHong Zhang 
1438985db425SBarry Smith   ierr = VecSet(v,0.0);CHKERRQ(ierr);
14391ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1440dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1441*d0f46423SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
144244117c81SHong Zhang   for (i=0; i<mbs; i++) {
1443d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1444d0f6400bSHong Zhang     brow  = bs*i;
144544117c81SHong Zhang     for (j=0; j<ncols; j++){
1446d0f6400bSHong Zhang       bcol = bs*(*aj);
144744117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1448d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
144944117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1450d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1451d0f6400bSHong Zhang           row = brow + krow;    /* row index */
1452c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1453c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
145444117c81SHong Zhang         }
145544117c81SHong Zhang       }
1456d0f6400bSHong Zhang       aj++;
1457dc354874SHong Zhang     }
1458dc354874SHong Zhang   }
14591ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1460dc354874SHong Zhang   PetscFunctionReturn(0);
1461dc354874SHong Zhang }
1462