xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision 1ebc52fb1af8d1010ec2d5787b14564139be722e)
173f4d377SMatthew Knepley /*$Id: sbaij2.c,v 1.32 2001/08/07 03:03:01 balay Exp $*/
249b5e25fSSatish Balay 
349b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h"
449b5e25fSSatish Balay #include "src/inline/spops.h"
549b5e25fSSatish Balay #include "src/inline/ilu.h"
649b5e25fSSatish Balay #include "petscbt.h"
73a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h"
849b5e25fSSatish Balay 
94a2ae208SSatish Balay #undef __FUNCT__
104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
11268466fbSBarry Smith int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS is[],int ov)
1249b5e25fSSatish Balay {
135eee224dSHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
145333f59eSHong Zhang   int          brow,i,j,k,l,mbs,n,*idx,ierr,*nidx,isz,bcol,
15d94109b8SHong Zhang                start,end,*ai,*aj,bs,*nidx2;
16d94109b8SHong Zhang   PetscBT      table;
17d94109b8SHong Zhang   PetscBT      table0;
18d94109b8SHong Zhang 
19d94109b8SHong Zhang   PetscFunctionBegin;
20d94109b8SHong Zhang   mbs = a->mbs;
21d94109b8SHong Zhang   ai  = a->i;
22d94109b8SHong Zhang   aj  = a->j;
23d94109b8SHong Zhang   bs  = a->bs;
24d94109b8SHong Zhang 
25d94109b8SHong Zhang   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
26d94109b8SHong Zhang 
27d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr);
28d94109b8SHong Zhang   ierr = PetscMalloc((mbs+1)*sizeof(int),&nidx);CHKERRQ(ierr);
29d94109b8SHong Zhang   ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr);
30d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr);
31d94109b8SHong Zhang 
32d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
33d94109b8SHong Zhang     isz  = 0;
34d94109b8SHong Zhang     ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr);
35d94109b8SHong Zhang 
36d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
37d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
38d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
39d94109b8SHong Zhang 
40d94109b8SHong Zhang     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
41d94109b8SHong Zhang     for (j=0; j<n ; ++j){
42d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
43d94109b8SHong Zhang       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
44d94109b8SHong Zhang       if(!PetscBTLookupSet(table,brow)) { nidx[isz++] = brow;}
45d94109b8SHong Zhang     }
46d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
47d94109b8SHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
48d94109b8SHong Zhang 
49d94109b8SHong Zhang     k = 0;
50d94109b8SHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
51d94109b8SHong Zhang       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
52d94109b8SHong Zhang       ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr);
53d94109b8SHong Zhang       for (l=k; l<isz; l++) PetscBTSet(table0,nidx[l]);
54d94109b8SHong Zhang 
55d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
56d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++){
57d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
58d94109b8SHong Zhang         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
59d94109b8SHong Zhang           for (l = start; l<end ; l++){
60d94109b8SHong Zhang             bcol = aj[l];
61d94109b8SHong Zhang             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
62d94109b8SHong Zhang           }
63d94109b8SHong Zhang           k++;
64d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
65d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
66d94109b8SHong Zhang           for (l = start; l<end ; l++){
67d94109b8SHong Zhang             bcol = aj[l];
68d94109b8SHong Zhang             if (PetscBTLookup(table0,bcol)){
69d94109b8SHong Zhang               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
70d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
71d94109b8SHong Zhang             }
72d94109b8SHong Zhang           }
73d94109b8SHong Zhang         }
74d94109b8SHong Zhang       }
75d94109b8SHong Zhang     } /* for each overlap */
76d94109b8SHong Zhang 
77d94109b8SHong Zhang     /* expand the Index Set */
78d94109b8SHong Zhang     for (j=0; j<isz; j++) {
79d94109b8SHong Zhang       for (k=0; k<bs; k++)
80d94109b8SHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
81d94109b8SHong Zhang     }
82d94109b8SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
83d94109b8SHong Zhang   }
84d94109b8SHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
85d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
86d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
87d94109b8SHong Zhang   ierr = PetscBTDestroy(table0);CHKERRQ(ierr);
885eee224dSHong Zhang   PetscFunctionReturn(0);
8949b5e25fSSatish Balay }
9049b5e25fSSatish Balay 
914a2ae208SSatish Balay #undef __FUNCT__
924a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
9349b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
9449b5e25fSSatish Balay {
9549b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
9649b5e25fSSatish Balay   int          *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens;
9749b5e25fSSatish Balay   int          row,mat_i,*mat_j,tcol,*mat_ilen;
9849b5e25fSSatish Balay   int          *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2;
9949b5e25fSSatish Balay   int          *aj = a->j,*ai = a->i;
10049b5e25fSSatish Balay   MatScalar    *mat_a;
10149b5e25fSSatish Balay   Mat          C;
10249b5e25fSSatish Balay   PetscTruth   flag;
10349b5e25fSSatish Balay 
10449b5e25fSSatish Balay   PetscFunctionBegin;
10549b5e25fSSatish Balay 
106ac355199SBarry Smith   if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
10749b5e25fSSatish Balay   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
108347d480fSBarry Smith   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
10949b5e25fSSatish Balay 
11049b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11149b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11249b5e25fSSatish Balay 
113b0a32e0cSBarry Smith   ierr  = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr);
11449b5e25fSSatish Balay   ssmap = smap;
115b0a32e0cSBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr);
11649b5e25fSSatish Balay   ierr  = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr);
11749b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
11849b5e25fSSatish Balay   /* determine lens of each row */
11949b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12049b5e25fSSatish Balay     kstart  = ai[irow[i]];
12149b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
12249b5e25fSSatish Balay     lens[i] = 0;
12349b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
12449b5e25fSSatish Balay         if (ssmap[aj[k]]) {
12549b5e25fSSatish Balay           lens[i]++;
12649b5e25fSSatish Balay         }
12749b5e25fSSatish Balay       }
12849b5e25fSSatish Balay     }
12949b5e25fSSatish Balay   /* Create and fill new matrix */
13049b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
13149b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
13249b5e25fSSatish Balay 
133347d480fSBarry Smith     if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
13449b5e25fSSatish Balay     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr);
13549b5e25fSSatish Balay     if (flag == PETSC_FALSE) {
136347d480fSBarry Smith       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
13749b5e25fSSatish Balay     }
13849b5e25fSSatish Balay     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr);
13949b5e25fSSatish Balay     C = *B;
14049b5e25fSSatish Balay   } else {
141e2d9671bSKris Buschelman     ierr = MatCreate(A->comm,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE,&C);CHKERRQ(ierr);
142e2d9671bSKris Buschelman     ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
143e2d9671bSKris Buschelman     ierr = MatSeqSBAIJSetPreallocation(C,bs,0,lens);CHKERRQ(ierr);
14449b5e25fSSatish Balay   }
14549b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
14649b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
14749b5e25fSSatish Balay     row    = irow[i];
14849b5e25fSSatish Balay     kstart = ai[row];
14949b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
15049b5e25fSSatish Balay     mat_i  = c->i[i];
15149b5e25fSSatish Balay     mat_j  = c->j + mat_i;
15249b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
15349b5e25fSSatish Balay     mat_ilen = c->ilen + i;
15449b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
15549b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
15649b5e25fSSatish Balay         *mat_j++ = tcol - 1;
15749b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
15849b5e25fSSatish Balay         mat_a   += bs2;
15949b5e25fSSatish Balay         (*mat_ilen)++;
16049b5e25fSSatish Balay       }
16149b5e25fSSatish Balay     }
16249b5e25fSSatish Balay   }
16349b5e25fSSatish Balay 
16449b5e25fSSatish Balay   /* Free work space */
16549b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
16649b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
16749b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16849b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16949b5e25fSSatish Balay 
17049b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
17149b5e25fSSatish Balay   *B = C;
17249b5e25fSSatish Balay   PetscFunctionReturn(0);
17349b5e25fSSatish Balay }
17449b5e25fSSatish Balay 
1754a2ae208SSatish Balay #undef __FUNCT__
1764a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
17749b5e25fSSatish Balay int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
17849b5e25fSSatish Balay {
17949b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
18049b5e25fSSatish Balay   IS          is1;
18149b5e25fSSatish Balay   int         *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count;
18249b5e25fSSatish Balay 
18349b5e25fSSatish Balay   PetscFunctionBegin;
184ac355199SBarry Smith   if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
18549b5e25fSSatish Balay 
18649b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
18749b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
18849b5e25fSSatish Balay 
18949b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
19049b5e25fSSatish Balay    and form the IS with compressed IS */
19182502324SSatish Balay   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr);
19249b5e25fSSatish Balay   iary = vary + a->mbs;
19349b5e25fSSatish Balay   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
19449b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
19549b5e25fSSatish Balay 
19649b5e25fSSatish Balay   count = 0;
19749b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
198ac355199SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks");
19949b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
20049b5e25fSSatish Balay   }
20149b5e25fSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
20249b5e25fSSatish Balay 
20349b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
20449b5e25fSSatish Balay   ierr = PetscFree(vary);CHKERRQ(ierr);
20549b5e25fSSatish Balay 
20649b5e25fSSatish Balay   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr);
20749b5e25fSSatish Balay   ISDestroy(is1);
20849b5e25fSSatish Balay   PetscFunctionReturn(0);
20949b5e25fSSatish Balay }
21049b5e25fSSatish Balay 
2114a2ae208SSatish Balay #undef __FUNCT__
2124a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
213268466fbSBarry Smith int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
21449b5e25fSSatish Balay {
21549b5e25fSSatish Balay   int ierr,i;
21649b5e25fSSatish Balay 
21749b5e25fSSatish Balay   PetscFunctionBegin;
21849b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
21982502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
22049b5e25fSSatish Balay   }
22149b5e25fSSatish Balay 
22249b5e25fSSatish Balay   for (i=0; i<n; i++) {
22349b5e25fSSatish Balay     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
22449b5e25fSSatish Balay   }
22549b5e25fSSatish Balay   PetscFunctionReturn(0);
22649b5e25fSSatish Balay }
22749b5e25fSSatish Balay 
22849b5e25fSSatish Balay /* -------------------------------------------------------*/
22949b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
23049b5e25fSSatish Balay /* -------------------------------------------------------*/
231d9eff348SSatish Balay #include "petscblaslapack.h"
23249b5e25fSSatish Balay 
2334a2ae208SSatish Balay #undef __FUNCT__
2344a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1"
23549b5e25fSSatish Balay int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
23649b5e25fSSatish Balay {
23749b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
23887828ca2SBarry Smith   PetscScalar     *x,*z,*xb,x1,zero=0.0;
23949b5e25fSSatish Balay   MatScalar       *v;
240831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
24149b5e25fSSatish Balay 
24249b5e25fSSatish Balay   PetscFunctionBegin;
24349b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
244*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
245*1ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
24649b5e25fSSatish Balay 
24749b5e25fSSatish Balay   v  = a->a;
24849b5e25fSSatish Balay   xb = x;
24949b5e25fSSatish Balay 
25049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
25149b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
25249b5e25fSSatish Balay     x1 = xb[0];
25349b5e25fSSatish Balay     ib = aj + *ai;
254831a3094SHong Zhang     jmin = 0;
255831a3094SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
256831a3094SHong Zhang       z[i] += *v++ * x[*ib++];
257831a3094SHong Zhang       jmin++;
258831a3094SHong Zhang     }
259831a3094SHong Zhang     for (j=jmin; j<n; j++) {
26049b5e25fSSatish Balay       cval    = *ib;
26149b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
26249b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
26349b5e25fSSatish Balay     }
26449b5e25fSSatish Balay     xb++; ai++;
26549b5e25fSSatish Balay   }
26649b5e25fSSatish Balay 
267*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
268*1ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
2696c6c5352SBarry Smith   PetscLogFlops(2*(a->nz*2 - A->m) - A->m);  /* nz = (nz+m)/2 */
27049b5e25fSSatish Balay   PetscFunctionReturn(0);
27149b5e25fSSatish Balay }
27249b5e25fSSatish Balay 
2734a2ae208SSatish Balay #undef __FUNCT__
2744a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
27549b5e25fSSatish Balay int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
27649b5e25fSSatish Balay {
27749b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
27887828ca2SBarry Smith   PetscScalar      *x,*z,*xb,x1,x2,zero=0.0;
27949b5e25fSSatish Balay   MatScalar        *v;
280831a3094SHong Zhang   int              mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
28149b5e25fSSatish Balay 
28249b5e25fSSatish Balay 
28349b5e25fSSatish Balay   PetscFunctionBegin;
28449b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
285*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
286*1ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
28749b5e25fSSatish Balay 
28849b5e25fSSatish Balay   v     = a->a;
28949b5e25fSSatish Balay   xb = x;
29049b5e25fSSatish Balay 
29149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
29249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
29349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
29449b5e25fSSatish Balay     ib = aj + *ai;
295831a3094SHong Zhang     jmin = 0;
2967fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
29749b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
29849b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
299831a3094SHong Zhang       v += 4; jmin++;
3007fbae186SHong Zhang     }
301831a3094SHong Zhang     for (j=jmin; j<n; j++) {
30249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
30349b5e25fSSatish Balay       cval       = ib[j]*2;
30449b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
30549b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
30649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
30749b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
30849b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
30949b5e25fSSatish Balay       v  += 4;
31049b5e25fSSatish Balay     }
31149b5e25fSSatish Balay     xb +=2; ai++;
31249b5e25fSSatish Balay   }
31349b5e25fSSatish Balay 
314*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
315*1ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
3166c6c5352SBarry Smith   PetscLogFlops(8*(a->nz*2 - A->m) - A->m);
31749b5e25fSSatish Balay   PetscFunctionReturn(0);
31849b5e25fSSatish Balay }
31949b5e25fSSatish Balay 
3204a2ae208SSatish Balay #undef __FUNCT__
3214a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
32249b5e25fSSatish Balay int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
32349b5e25fSSatish Balay {
32449b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
32587828ca2SBarry Smith   PetscScalar   *x,*z,*xb,x1,x2,x3,zero=0.0;
32649b5e25fSSatish Balay   MatScalar     *v;
327831a3094SHong Zhang   int           mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
32849b5e25fSSatish Balay 
32949b5e25fSSatish Balay 
33049b5e25fSSatish Balay   PetscFunctionBegin;
33149b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
332*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
333*1ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
33449b5e25fSSatish Balay 
33549b5e25fSSatish Balay   v     = a->a;
33649b5e25fSSatish Balay   xb = x;
33749b5e25fSSatish Balay 
33849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
33949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
34049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
34149b5e25fSSatish Balay     ib = aj + *ai;
342831a3094SHong Zhang     jmin = 0;
3437fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
34449b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
34549b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
34649b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
347831a3094SHong Zhang       v += 9; jmin++;
3487fbae186SHong Zhang     }
349831a3094SHong Zhang     for (j=jmin; j<n; j++) {
35049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
35149b5e25fSSatish Balay       cval       = ib[j]*3;
35249b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
35349b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
35449b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
35549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
35649b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
35749b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
35849b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
35949b5e25fSSatish Balay       v  += 9;
36049b5e25fSSatish Balay     }
36149b5e25fSSatish Balay     xb +=3; ai++;
36249b5e25fSSatish Balay   }
36349b5e25fSSatish Balay 
364*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
365*1ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
3666c6c5352SBarry Smith   PetscLogFlops(18*(a->nz*2 - A->m) - A->m);
36749b5e25fSSatish Balay   PetscFunctionReturn(0);
36849b5e25fSSatish Balay }
36949b5e25fSSatish Balay 
3704a2ae208SSatish Balay #undef __FUNCT__
3714a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
37249b5e25fSSatish Balay int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
37349b5e25fSSatish Balay {
37449b5e25fSSatish Balay   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
37587828ca2SBarry Smith   PetscScalar      *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
37649b5e25fSSatish Balay   MatScalar        *v;
377831a3094SHong Zhang   int              mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
37849b5e25fSSatish Balay 
37949b5e25fSSatish Balay   PetscFunctionBegin;
38049b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
381*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
382*1ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
38349b5e25fSSatish Balay 
38449b5e25fSSatish Balay   v     = a->a;
38549b5e25fSSatish Balay   xb = x;
38649b5e25fSSatish Balay 
38749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
38849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
38949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
39049b5e25fSSatish Balay     ib = aj + *ai;
391831a3094SHong Zhang     jmin = 0;
3927fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
39349b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
39449b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
39549b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
39649b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
397831a3094SHong Zhang       v += 16; jmin++;
3987fbae186SHong Zhang     }
399831a3094SHong Zhang     for (j=jmin; j<n; j++) {
40049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
40149b5e25fSSatish Balay       cval       = ib[j]*4;
40249b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
40349b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
40449b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
40549b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
40649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
40749b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
40849b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
40949b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
41049b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
41149b5e25fSSatish Balay       v  += 16;
41249b5e25fSSatish Balay     }
41349b5e25fSSatish Balay     xb +=4; ai++;
41449b5e25fSSatish Balay   }
41549b5e25fSSatish Balay 
416*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
417*1ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
4186c6c5352SBarry Smith   PetscLogFlops(32*(a->nz*2 - A->m) - A->m);
41949b5e25fSSatish Balay   PetscFunctionReturn(0);
42049b5e25fSSatish Balay }
42149b5e25fSSatish Balay 
4224a2ae208SSatish Balay #undef __FUNCT__
4234a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
42449b5e25fSSatish Balay int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
42549b5e25fSSatish Balay {
42649b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
42787828ca2SBarry Smith   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
42849b5e25fSSatish Balay   MatScalar       *v;
429831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
43049b5e25fSSatish Balay 
43149b5e25fSSatish Balay   PetscFunctionBegin;
43249b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
433*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
434*1ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
43549b5e25fSSatish Balay 
43649b5e25fSSatish Balay   v     = a->a;
43749b5e25fSSatish Balay   xb = x;
43849b5e25fSSatish Balay 
43949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
44049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
44149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
44249b5e25fSSatish Balay     ib = aj + *ai;
443831a3094SHong Zhang     jmin = 0;
4447fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
44549b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
44649b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
44749b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
44849b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
44949b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
450831a3094SHong Zhang       v += 25; jmin++;
4517fbae186SHong Zhang     }
452831a3094SHong Zhang     for (j=jmin; j<n; j++) {
45349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
45449b5e25fSSatish Balay       cval       = ib[j]*5;
45549b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
45649b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
45749b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
45849b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
45949b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
46049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
46149b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
46249b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
46349b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
46449b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
46549b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
46649b5e25fSSatish Balay       v  += 25;
46749b5e25fSSatish Balay     }
46849b5e25fSSatish Balay     xb +=5; ai++;
46949b5e25fSSatish Balay   }
47049b5e25fSSatish Balay 
471*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
472*1ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
4736c6c5352SBarry Smith   PetscLogFlops(50*(a->nz*2 - A->m) - A->m);
47449b5e25fSSatish Balay   PetscFunctionReturn(0);
47549b5e25fSSatish Balay }
47649b5e25fSSatish Balay 
47749b5e25fSSatish Balay 
4784a2ae208SSatish Balay #undef __FUNCT__
4794a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
48049b5e25fSSatish Balay int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
48149b5e25fSSatish Balay {
48249b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
48387828ca2SBarry Smith   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
48449b5e25fSSatish Balay   MatScalar       *v;
485831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
48649b5e25fSSatish Balay 
48749b5e25fSSatish Balay   PetscFunctionBegin;
48849b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
489*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
490*1ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
49149b5e25fSSatish Balay 
49249b5e25fSSatish Balay   v     = a->a;
49349b5e25fSSatish Balay   xb = x;
49449b5e25fSSatish Balay 
49549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
49649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
49749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
49849b5e25fSSatish Balay     ib = aj + *ai;
499831a3094SHong Zhang     jmin = 0;
5007fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
50149b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
50249b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
50349b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
50449b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
50549b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
50649b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
507831a3094SHong Zhang       v += 36; jmin++;
5087fbae186SHong Zhang     }
509831a3094SHong Zhang     for (j=jmin; j<n; j++) {
51049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
51149b5e25fSSatish Balay       cval       = ib[j]*6;
51249b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
51349b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
51449b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
51549b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
51649b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
51749b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
51849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
51949b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
52049b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
52149b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
52249b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
52349b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
52449b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
52549b5e25fSSatish Balay       v  += 36;
52649b5e25fSSatish Balay     }
52749b5e25fSSatish Balay     xb +=6; ai++;
52849b5e25fSSatish Balay   }
52949b5e25fSSatish Balay 
530*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
531*1ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
5326c6c5352SBarry Smith   PetscLogFlops(72*(a->nz*2 - A->m) - A->m);
53349b5e25fSSatish Balay   PetscFunctionReturn(0);
53449b5e25fSSatish Balay }
5354a2ae208SSatish Balay #undef __FUNCT__
5364a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
53749b5e25fSSatish Balay int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
53849b5e25fSSatish Balay {
53949b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
54087828ca2SBarry Smith   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
54149b5e25fSSatish Balay   MatScalar       *v;
542831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
54349b5e25fSSatish Balay 
54449b5e25fSSatish Balay   PetscFunctionBegin;
54549b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
546*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
547*1ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
54849b5e25fSSatish Balay 
54949b5e25fSSatish Balay   v     = a->a;
55049b5e25fSSatish Balay   xb = x;
55149b5e25fSSatish Balay 
55249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
55349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
55449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
55549b5e25fSSatish Balay     ib = aj + *ai;
556831a3094SHong Zhang     jmin = 0;
5577fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
55849b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
55949b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
56049b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
56149b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
56249b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
56349b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
56449b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
565831a3094SHong Zhang       v += 49; jmin++;
5667fbae186SHong Zhang     }
567831a3094SHong Zhang     for (j=jmin; j<n; j++) {
56849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
56949b5e25fSSatish Balay       cval       = ib[j]*7;
57049b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
57149b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
57249b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
57349b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
57449b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
57549b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
57649b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
57749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
57849b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
57949b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
58049b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
58149b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
58249b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
58349b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
58449b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
58549b5e25fSSatish Balay       v  += 49;
58649b5e25fSSatish Balay     }
58749b5e25fSSatish Balay     xb +=7; ai++;
58849b5e25fSSatish Balay   }
589*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
590*1ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
5916c6c5352SBarry Smith   PetscLogFlops(98*(a->nz*2 - A->m) - A->m);
59249b5e25fSSatish Balay   PetscFunctionReturn(0);
59349b5e25fSSatish Balay }
59449b5e25fSSatish Balay 
59549b5e25fSSatish Balay /*
59649b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
59749b5e25fSSatish Balay */
5984a2ae208SSatish Balay #undef __FUNCT__
5994a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
60049b5e25fSSatish Balay int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
60149b5e25fSSatish Balay {
60249b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
60387828ca2SBarry Smith   PetscScalar     *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
6040b60a74dSHong Zhang   MatScalar       *v;
605066653e3SSatish Balay   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
6060b60a74dSHong Zhang   int             ncols,k;
60749b5e25fSSatish Balay 
60849b5e25fSSatish Balay   PetscFunctionBegin;
60949b5e25fSSatish Balay   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
610*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
611*1ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
61249b5e25fSSatish Balay 
61349b5e25fSSatish Balay   aj   = a->j;
61449b5e25fSSatish Balay   v    = a->a;
61549b5e25fSSatish Balay   ii   = a->i;
61649b5e25fSSatish Balay 
61749b5e25fSSatish Balay   if (!a->mult_work) {
61887828ca2SBarry Smith     ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
61949b5e25fSSatish Balay   }
62049b5e25fSSatish Balay   work = a->mult_work;
62149b5e25fSSatish Balay 
62249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
62349b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
62449b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
62549b5e25fSSatish Balay 
62649b5e25fSSatish Balay     /* upper triangular part */
62749b5e25fSSatish Balay     for (j=0; j<n; j++) {
62849b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
62949b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
63049b5e25fSSatish Balay       workt += bs;
63149b5e25fSSatish Balay     }
63249b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
63349b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
63449b5e25fSSatish Balay 
63549b5e25fSSatish Balay     /* strict lower triangular part */
636831a3094SHong Zhang     idx = aj+ii[0];
637831a3094SHong Zhang     if (*idx == i){
63896b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
639831a3094SHong Zhang     }
64096b9376eSHong Zhang 
64149b5e25fSSatish Balay     if (ncols > 0){
64249b5e25fSSatish Balay       workt = work;
64387828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
644831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
645831a3094SHong Zhang       for (j=0; j<n; j++) {
646831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
64749b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
64849b5e25fSSatish Balay         workt += bs;
64949b5e25fSSatish Balay       }
65049b5e25fSSatish Balay     }
65149b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
65249b5e25fSSatish Balay   }
65349b5e25fSSatish Balay 
654*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
655*1ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
6566c6c5352SBarry Smith   PetscLogFlops(2*(a->nz*2 - A->m)*bs2 - A->m);
65749b5e25fSSatish Balay   PetscFunctionReturn(0);
65849b5e25fSSatish Balay }
65949b5e25fSSatish Balay 
6604a2ae208SSatish Balay #undef __FUNCT__
6614a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
66249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
66349b5e25fSSatish Balay {
66449b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
66587828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1;
66649b5e25fSSatish Balay   MatScalar       *v;
667831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
66849b5e25fSSatish Balay 
66949b5e25fSSatish Balay   PetscFunctionBegin;
670*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
67149b5e25fSSatish Balay   if (yy != xx) {
672*1ebc52fbSHong Zhang     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
67349b5e25fSSatish Balay   } else {
67449b5e25fSSatish Balay     y = x;
67549b5e25fSSatish Balay   }
67649b5e25fSSatish Balay   if (zz != yy) {
677a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
678*1ebc52fbSHong Zhang     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
679a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
68049b5e25fSSatish Balay   } else {
68149b5e25fSSatish Balay     z = y;
68249b5e25fSSatish Balay   }
68349b5e25fSSatish Balay 
68449b5e25fSSatish Balay   v  = a->a;
68549b5e25fSSatish Balay   xb = x;
68649b5e25fSSatish Balay 
68749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
68849b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
68949b5e25fSSatish Balay     x1 = xb[0];
69049b5e25fSSatish Balay     ib = aj + *ai;
691831a3094SHong Zhang     jmin = 0;
692831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
693831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
694831a3094SHong Zhang     }
695831a3094SHong Zhang     for (j=jmin; j<n; j++) {
69649b5e25fSSatish Balay       cval    = *ib;
69749b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
69849b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
69949b5e25fSSatish Balay     }
70049b5e25fSSatish Balay     xb++; ai++;
70149b5e25fSSatish Balay   }
70249b5e25fSSatish Balay 
703*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
704*1ebc52fbSHong Zhang   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
705*1ebc52fbSHong Zhang   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
70649b5e25fSSatish Balay 
7076c6c5352SBarry Smith   PetscLogFlops(2*(a->nz*2 - A->m));
70849b5e25fSSatish Balay   PetscFunctionReturn(0);
70949b5e25fSSatish Balay }
71049b5e25fSSatish Balay 
7114a2ae208SSatish Balay #undef __FUNCT__
7124a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
71349b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
71449b5e25fSSatish Balay {
71549b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
71687828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2;
71749b5e25fSSatish Balay   MatScalar       *v;
718831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
71949b5e25fSSatish Balay 
72049b5e25fSSatish Balay   PetscFunctionBegin;
721*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
72249b5e25fSSatish Balay   if (yy != xx) {
723*1ebc52fbSHong Zhang     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
72449b5e25fSSatish Balay   } else {
72549b5e25fSSatish Balay     y = x;
72649b5e25fSSatish Balay   }
72749b5e25fSSatish Balay   if (zz != yy) {
728a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
729*1ebc52fbSHong Zhang     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
730a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
73149b5e25fSSatish Balay   } else {
73249b5e25fSSatish Balay     z = y;
73349b5e25fSSatish Balay   }
73449b5e25fSSatish Balay 
73549b5e25fSSatish Balay   v     = a->a;
73649b5e25fSSatish Balay   xb = x;
73749b5e25fSSatish Balay 
73849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
73949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
74049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
74149b5e25fSSatish Balay     ib = aj + *ai;
742831a3094SHong Zhang     jmin = 0;
7437fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
74449b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
74549b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
746831a3094SHong Zhang       v += 4; jmin++;
7477fbae186SHong Zhang     }
748831a3094SHong Zhang     for (j=jmin; j<n; j++) {
74949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
75049b5e25fSSatish Balay       cval       = ib[j]*2;
75149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
75249b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
75349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
75449b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
75549b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
75649b5e25fSSatish Balay       v  += 4;
75749b5e25fSSatish Balay     }
75849b5e25fSSatish Balay     xb +=2; ai++;
75949b5e25fSSatish Balay   }
76049b5e25fSSatish Balay 
761*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
762*1ebc52fbSHong Zhang   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
763*1ebc52fbSHong Zhang   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
76449b5e25fSSatish Balay 
7656c6c5352SBarry Smith   PetscLogFlops(4*(a->nz*2 - A->m));
76649b5e25fSSatish Balay   PetscFunctionReturn(0);
76749b5e25fSSatish Balay }
76849b5e25fSSatish Balay 
7694a2ae208SSatish Balay #undef __FUNCT__
7704a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
77149b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
77249b5e25fSSatish Balay {
77349b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
77487828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3;
77549b5e25fSSatish Balay   MatScalar       *v;
776831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
77749b5e25fSSatish Balay 
77849b5e25fSSatish Balay   PetscFunctionBegin;
779*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
78049b5e25fSSatish Balay   if (yy != xx) {
781*1ebc52fbSHong Zhang     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
78249b5e25fSSatish Balay   } else {
78349b5e25fSSatish Balay     y = x;
78449b5e25fSSatish Balay   }
78549b5e25fSSatish Balay   if (zz != yy) {
786a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
787*1ebc52fbSHong Zhang     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
788a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
78949b5e25fSSatish Balay   } else {
79049b5e25fSSatish Balay     z = y;
79149b5e25fSSatish Balay   }
79249b5e25fSSatish Balay 
79349b5e25fSSatish Balay   v     = a->a;
79449b5e25fSSatish Balay   xb = x;
79549b5e25fSSatish Balay 
79649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
79749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
79849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
79949b5e25fSSatish Balay     ib = aj + *ai;
800831a3094SHong Zhang     jmin = 0;
8017fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
80249b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
80349b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
80449b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
805831a3094SHong Zhang      v += 9; jmin++;
8067fbae186SHong Zhang     }
807831a3094SHong Zhang     for (j=jmin; j<n; j++) {
80849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
80949b5e25fSSatish Balay       cval       = ib[j]*3;
81049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
81149b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
81249b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
81349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
81449b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
81549b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
81649b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
81749b5e25fSSatish Balay       v  += 9;
81849b5e25fSSatish Balay     }
81949b5e25fSSatish Balay     xb +=3; ai++;
82049b5e25fSSatish Balay   }
82149b5e25fSSatish Balay 
822*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
823*1ebc52fbSHong Zhang   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
824*1ebc52fbSHong Zhang   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
82549b5e25fSSatish Balay 
8266c6c5352SBarry Smith   PetscLogFlops(18*(a->nz*2 - A->m));
82749b5e25fSSatish Balay   PetscFunctionReturn(0);
82849b5e25fSSatish Balay }
82949b5e25fSSatish Balay 
8304a2ae208SSatish Balay #undef __FUNCT__
8314a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
83249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
83349b5e25fSSatish Balay {
83449b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
83587828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4;
83649b5e25fSSatish Balay   MatScalar       *v;
837831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
83849b5e25fSSatish Balay 
83949b5e25fSSatish Balay   PetscFunctionBegin;
840*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
84149b5e25fSSatish Balay   if (yy != xx) {
842*1ebc52fbSHong Zhang     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
84349b5e25fSSatish Balay   } else {
84449b5e25fSSatish Balay     y = x;
84549b5e25fSSatish Balay   }
84649b5e25fSSatish Balay   if (zz != yy) {
847a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
848*1ebc52fbSHong Zhang     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
849a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
85049b5e25fSSatish Balay   } else {
85149b5e25fSSatish Balay     z = y;
85249b5e25fSSatish Balay   }
85349b5e25fSSatish Balay 
85449b5e25fSSatish Balay   v     = a->a;
85549b5e25fSSatish Balay   xb = x;
85649b5e25fSSatish Balay 
85749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
85849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
85949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
86049b5e25fSSatish Balay     ib = aj + *ai;
861831a3094SHong Zhang     jmin = 0;
8627fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
86349b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
86449b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
86549b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
86649b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
867831a3094SHong Zhang       v += 16; jmin++;
8687fbae186SHong Zhang     }
869831a3094SHong Zhang     for (j=jmin; j<n; j++) {
87049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
87149b5e25fSSatish Balay       cval       = ib[j]*4;
87249b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
87349b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
87449b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
87549b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
87649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
87749b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
87849b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
87949b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
88049b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
88149b5e25fSSatish Balay       v  += 16;
88249b5e25fSSatish Balay     }
88349b5e25fSSatish Balay     xb +=4; ai++;
88449b5e25fSSatish Balay   }
88549b5e25fSSatish Balay 
886*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
887*1ebc52fbSHong Zhang   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
888*1ebc52fbSHong Zhang   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
88949b5e25fSSatish Balay 
8906c6c5352SBarry Smith   PetscLogFlops(32*(a->nz*2 - A->m));
89149b5e25fSSatish Balay   PetscFunctionReturn(0);
89249b5e25fSSatish Balay }
89349b5e25fSSatish Balay 
8944a2ae208SSatish Balay #undef __FUNCT__
8954a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
89649b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
89749b5e25fSSatish Balay {
89849b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
89987828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5;
90049b5e25fSSatish Balay   MatScalar       *v;
901831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
90249b5e25fSSatish Balay 
90349b5e25fSSatish Balay   PetscFunctionBegin;
904*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
90549b5e25fSSatish Balay   if (yy != xx) {
906*1ebc52fbSHong Zhang     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
90749b5e25fSSatish Balay   } else {
90849b5e25fSSatish Balay     y = x;
90949b5e25fSSatish Balay   }
91049b5e25fSSatish Balay   if (zz != yy) {
911a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
912*1ebc52fbSHong Zhang     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
913a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
91449b5e25fSSatish Balay   } else {
91549b5e25fSSatish Balay     z = y;
91649b5e25fSSatish Balay   }
91749b5e25fSSatish Balay 
91849b5e25fSSatish Balay   v     = a->a;
91949b5e25fSSatish Balay   xb = x;
92049b5e25fSSatish Balay 
92149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
92249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
92349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
92449b5e25fSSatish Balay     ib = aj + *ai;
925831a3094SHong Zhang     jmin = 0;
9267fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
92749b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
92849b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
92949b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
93049b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
93149b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
932831a3094SHong Zhang       v += 25; jmin++;
9337fbae186SHong Zhang     }
934831a3094SHong Zhang     for (j=jmin; j<n; j++) {
93549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
93649b5e25fSSatish Balay       cval       = ib[j]*5;
93749b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
93849b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
93949b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
94049b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
94149b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
94249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
94349b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
94449b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
94549b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
94649b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
94749b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
94849b5e25fSSatish Balay       v  += 25;
94949b5e25fSSatish Balay     }
95049b5e25fSSatish Balay     xb +=5; ai++;
95149b5e25fSSatish Balay   }
95249b5e25fSSatish Balay 
953*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
954*1ebc52fbSHong Zhang   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
955*1ebc52fbSHong Zhang   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
95649b5e25fSSatish Balay 
9576c6c5352SBarry Smith   PetscLogFlops(50*(a->nz*2 - A->m));
95849b5e25fSSatish Balay   PetscFunctionReturn(0);
95949b5e25fSSatish Balay }
9604a2ae208SSatish Balay #undef __FUNCT__
9614a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
96249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
96349b5e25fSSatish Balay {
96449b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
96587828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6;
96649b5e25fSSatish Balay   MatScalar       *v;
967831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
96849b5e25fSSatish Balay 
96949b5e25fSSatish Balay   PetscFunctionBegin;
970*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
97149b5e25fSSatish Balay   if (yy != xx) {
972*1ebc52fbSHong Zhang     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
97349b5e25fSSatish Balay   } else {
97449b5e25fSSatish Balay     y = x;
97549b5e25fSSatish Balay   }
97649b5e25fSSatish Balay   if (zz != yy) {
977a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
978*1ebc52fbSHong Zhang     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
979a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
98049b5e25fSSatish Balay   } else {
98149b5e25fSSatish Balay     z = y;
98249b5e25fSSatish Balay   }
98349b5e25fSSatish Balay 
98449b5e25fSSatish Balay   v     = a->a;
98549b5e25fSSatish Balay   xb = x;
98649b5e25fSSatish Balay 
98749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
98849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
98949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
99049b5e25fSSatish Balay     ib = aj + *ai;
991831a3094SHong Zhang     jmin = 0;
9927fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
99349b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
99449b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
99549b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
99649b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
99749b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
99849b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
999831a3094SHong Zhang       v += 36; jmin++;
10007fbae186SHong Zhang     }
1001831a3094SHong Zhang     for (j=jmin; j<n; j++) {
100249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
100349b5e25fSSatish Balay       cval       = ib[j]*6;
100449b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
100549b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
100649b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
100749b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
100849b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
100949b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
101049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
101149b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
101249b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
101349b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
101449b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
101549b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
101649b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
101749b5e25fSSatish Balay       v  += 36;
101849b5e25fSSatish Balay     }
101949b5e25fSSatish Balay     xb +=6; ai++;
102049b5e25fSSatish Balay   }
102149b5e25fSSatish Balay 
1022*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1023*1ebc52fbSHong Zhang   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1024*1ebc52fbSHong Zhang   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
102549b5e25fSSatish Balay 
10266c6c5352SBarry Smith   PetscLogFlops(72*(a->nz*2 - A->m));
102749b5e25fSSatish Balay   PetscFunctionReturn(0);
102849b5e25fSSatish Balay }
102949b5e25fSSatish Balay 
10304a2ae208SSatish Balay #undef __FUNCT__
10314a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
103249b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
103349b5e25fSSatish Balay {
103449b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
103587828ca2SBarry Smith   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
103649b5e25fSSatish Balay   MatScalar       *v;
1037831a3094SHong Zhang   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;
103849b5e25fSSatish Balay 
103949b5e25fSSatish Balay   PetscFunctionBegin;
1040*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
104149b5e25fSSatish Balay   if (yy != xx) {
1042*1ebc52fbSHong Zhang     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
104349b5e25fSSatish Balay   } else {
104449b5e25fSSatish Balay     y = x;
104549b5e25fSSatish Balay   }
104649b5e25fSSatish Balay   if (zz != yy) {
1047a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
1048*1ebc52fbSHong Zhang     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
1049a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
105049b5e25fSSatish Balay   } else {
105149b5e25fSSatish Balay     z = y;
105249b5e25fSSatish Balay   }
105349b5e25fSSatish Balay 
105449b5e25fSSatish Balay   v     = a->a;
105549b5e25fSSatish Balay   xb = x;
105649b5e25fSSatish Balay 
105749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
105849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
105949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
106049b5e25fSSatish Balay     ib = aj + *ai;
1061831a3094SHong Zhang     jmin = 0;
10627fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
106349b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
106449b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
106549b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
106649b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
106749b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
106849b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
106949b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1070831a3094SHong Zhang       v += 49; jmin++;
10717fbae186SHong Zhang     }
1072831a3094SHong Zhang     for (j=jmin; j<n; j++) {
107349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
107449b5e25fSSatish Balay       cval       = ib[j]*7;
107549b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
107649b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
107749b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
107849b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
107949b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
108049b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
108149b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
108249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
108349b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
108449b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
108549b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
108649b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
108749b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
108849b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
108949b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
109049b5e25fSSatish Balay       v  += 49;
109149b5e25fSSatish Balay     }
109249b5e25fSSatish Balay     xb +=7; ai++;
109349b5e25fSSatish Balay   }
109449b5e25fSSatish Balay 
1095*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1096*1ebc52fbSHong Zhang   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1097*1ebc52fbSHong Zhang   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
109849b5e25fSSatish Balay 
10996c6c5352SBarry Smith   PetscLogFlops(98*(a->nz*2 - A->m));
110049b5e25fSSatish Balay   PetscFunctionReturn(0);
110149b5e25fSSatish Balay }
110249b5e25fSSatish Balay 
11034a2ae208SSatish Balay #undef __FUNCT__
11044a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
110549b5e25fSSatish Balay int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
110649b5e25fSSatish Balay {
110749b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
110887828ca2SBarry Smith   PetscScalar     *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1109066653e3SSatish Balay   MatScalar       *v;
1110066653e3SSatish Balay   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
1111066653e3SSatish Balay   int             ncols,k;
111249b5e25fSSatish Balay 
111349b5e25fSSatish Balay   PetscFunctionBegin;
1114*1ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
111549b5e25fSSatish Balay   if (yy != xx) {
1116*1ebc52fbSHong Zhang     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
111749b5e25fSSatish Balay   } else {
111849b5e25fSSatish Balay     y = x;
111949b5e25fSSatish Balay   }
112049b5e25fSSatish Balay   if (zz != yy) {
1121a9d4b620SHong Zhang     /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */
1122*1ebc52fbSHong Zhang     ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
1123a9d4b620SHong Zhang     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
112449b5e25fSSatish Balay   } else {
112549b5e25fSSatish Balay     z = y;
112649b5e25fSSatish Balay   }
112749b5e25fSSatish Balay 
112849b5e25fSSatish Balay   aj   = a->j;
112949b5e25fSSatish Balay   v    = a->a;
113049b5e25fSSatish Balay   ii   = a->i;
113149b5e25fSSatish Balay 
113249b5e25fSSatish Balay   if (!a->mult_work) {
113387828ca2SBarry Smith     ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
113449b5e25fSSatish Balay   }
113549b5e25fSSatish Balay   work = a->mult_work;
113649b5e25fSSatish Balay 
113749b5e25fSSatish Balay 
113849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
113949b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
114049b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
114149b5e25fSSatish Balay 
114249b5e25fSSatish Balay     /* upper triangular part */
114349b5e25fSSatish Balay     for (j=0; j<n; j++) {
114449b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
114549b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
114649b5e25fSSatish Balay       workt += bs;
114749b5e25fSSatish Balay     }
114849b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
114949b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
115049b5e25fSSatish Balay 
115149b5e25fSSatish Balay     /* strict lower triangular part */
1152831a3094SHong Zhang     idx = aj+ii[0];
1153831a3094SHong Zhang     if (*idx == i){
115496b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1155831a3094SHong Zhang     }
115649b5e25fSSatish Balay     if (ncols > 0){
115749b5e25fSSatish Balay       workt = work;
115887828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1159831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1160831a3094SHong Zhang       for (j=0; j<n; j++) {
1161831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
1162831a3094SHong Zhang         /* idx++; */
116349b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
116449b5e25fSSatish Balay         workt += bs;
116549b5e25fSSatish Balay       }
116649b5e25fSSatish Balay     }
116749b5e25fSSatish Balay 
116849b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
116949b5e25fSSatish Balay   }
117049b5e25fSSatish Balay 
1171*1ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1172*1ebc52fbSHong Zhang   if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1173*1ebc52fbSHong Zhang   if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
117449b5e25fSSatish Balay 
11756c6c5352SBarry Smith   PetscLogFlops(2*(a->nz*2 - A->m));
117649b5e25fSSatish Balay   PetscFunctionReturn(0);
117749b5e25fSSatish Balay }
117849b5e25fSSatish Balay 
11794a2ae208SSatish Balay #undef __FUNCT__
11804a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_SeqSBAIJ"
118149b5e25fSSatish Balay int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz)
118249b5e25fSSatish Balay {
1183eeeff2ecSHong Zhang   int ierr;
1184eeeff2ecSHong Zhang 
118549b5e25fSSatish Balay   PetscFunctionBegin;
1186eeeff2ecSHong Zhang   ierr = MatMult(A,xx,zz);CHKERRQ(ierr);
1187eeeff2ecSHong Zhang   PetscFunctionReturn(0);
118849b5e25fSSatish Balay }
118949b5e25fSSatish Balay 
11904a2ae208SSatish Balay #undef __FUNCT__
11914a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_SeqSBAIJ"
119249b5e25fSSatish Balay int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
119349b5e25fSSatish Balay 
119449b5e25fSSatish Balay {
1195eeeff2ecSHong Zhang   int ierr;
1196eeeff2ecSHong Zhang 
119749b5e25fSSatish Balay   PetscFunctionBegin;
1198eeeff2ecSHong Zhang   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1199eeeff2ecSHong Zhang   PetscFunctionReturn(0);
120049b5e25fSSatish Balay }
120149b5e25fSSatish Balay 
12024a2ae208SSatish Balay #undef __FUNCT__
12034a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1204268466fbSBarry Smith int MatScale_SeqSBAIJ(const PetscScalar *alpha,Mat inA)
120549b5e25fSSatish Balay {
120649b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
12076c6c5352SBarry Smith   int         one = 1,totalnz = a->bs2*a->nz;
120849b5e25fSSatish Balay 
120949b5e25fSSatish Balay   PetscFunctionBegin;
1210268466fbSBarry Smith   BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one);
1211b0a32e0cSBarry Smith   PetscLogFlops(totalnz);
121249b5e25fSSatish Balay   PetscFunctionReturn(0);
121349b5e25fSSatish Balay }
121449b5e25fSSatish Balay 
12154a2ae208SSatish Balay #undef __FUNCT__
12164a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
121749b5e25fSSatish Balay int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
121849b5e25fSSatish Balay {
121949b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
122049b5e25fSSatish Balay   MatScalar   *v = a->a;
122149b5e25fSSatish Balay   PetscReal   sum_diag = 0.0, sum_off = 0.0, *sum;
1222831a3094SHong Zhang   int         i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1223831a3094SHong Zhang   int         *jl,*il,jmin,jmax,ierr,nexti,ik,*col;
122449b5e25fSSatish Balay 
122549b5e25fSSatish Balay   PetscFunctionBegin;
122649b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
122749b5e25fSSatish Balay     for (k=0; k<mbs; k++){
122849b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1229831a3094SHong Zhang       col  = aj + jmin;
1230831a3094SHong Zhang       if (*col == k){         /* diagonal block */
123149b5e25fSSatish Balay         for (i=0; i<bs2; i++){
123249b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
123349b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
123449b5e25fSSatish Balay #else
123549b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
123649b5e25fSSatish Balay #endif
123749b5e25fSSatish Balay         }
1238831a3094SHong Zhang         jmin++;
1239831a3094SHong Zhang       }
1240831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
124149b5e25fSSatish Balay         for (i=0; i<bs2; i++){
124249b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
124349b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
124449b5e25fSSatish Balay #else
124549b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
124649b5e25fSSatish Balay #endif
124749b5e25fSSatish Balay         }
124849b5e25fSSatish Balay       }
124949b5e25fSSatish Balay     }
125049b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
125149b5e25fSSatish Balay 
125249b5e25fSSatish Balay   }  else if (type == NORM_INFINITY) { /* maximum row sum */
125382502324SSatish Balay     ierr = PetscMalloc(mbs*sizeof(int),&il);CHKERRQ(ierr);
125482502324SSatish Balay     ierr = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr);
125582502324SSatish Balay     ierr = PetscMalloc(bs*sizeof(PetscReal),&sum);CHKERRQ(ierr);
125649b5e25fSSatish Balay     for (i=0; i<mbs; i++) {
125749b5e25fSSatish Balay       jl[i] = mbs; il[0] = 0;
125849b5e25fSSatish Balay     }
125949b5e25fSSatish Balay 
126049b5e25fSSatish Balay     *norm = 0.0;
126149b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
126249b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
126349b5e25fSSatish Balay 
126449b5e25fSSatish Balay       /*-- col sum --*/
126549b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1266831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1267831a3094SHong Zhang                   at step k */
126849b5e25fSSatish Balay       while (i<mbs){
126949b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
127049b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
127149b5e25fSSatish Balay         for (j=0; j<bs; j++){
127249b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
127349b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
127449b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
127549b5e25fSSatish Balay           }
127649b5e25fSSatish Balay         }
127749b5e25fSSatish Balay         /* update il, jl */
1278831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1279831a3094SHong Zhang         jmax = a->i[i+1];
128049b5e25fSSatish Balay         if (jmin < jmax){
128149b5e25fSSatish Balay           il[i] = jmin;
128249b5e25fSSatish Balay           j   = a->j[jmin];
128349b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
128449b5e25fSSatish Balay         }
128549b5e25fSSatish Balay         i = nexti;
128649b5e25fSSatish Balay       }
128749b5e25fSSatish Balay 
128849b5e25fSSatish Balay       /*-- row sum --*/
128949b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
129049b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
129149b5e25fSSatish Balay         for (j=0; j<bs; j++){
129249b5e25fSSatish Balay           v = a->a + i*bs2 + j;
129349b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
129449b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v);
129549b5e25fSSatish Balay             v   += bs;
129649b5e25fSSatish Balay           }
129749b5e25fSSatish Balay         }
129849b5e25fSSatish Balay       }
129949b5e25fSSatish Balay       /* add k_th block row to il, jl */
1300831a3094SHong Zhang       col = aj+jmin;
1301831a3094SHong Zhang       if (*col == k) jmin++;
130249b5e25fSSatish Balay       if (jmin < jmax){
130349b5e25fSSatish Balay         il[k] = jmin;
130449b5e25fSSatish Balay         j   = a->j[jmin];
130549b5e25fSSatish Balay         jl[k] = jl[j]; jl[j] = k;
130649b5e25fSSatish Balay       }
130749b5e25fSSatish Balay       for (j=0; j<bs; j++){
130849b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
130949b5e25fSSatish Balay       }
131049b5e25fSSatish Balay     }
131149b5e25fSSatish Balay     ierr = PetscFree(il);CHKERRQ(ierr);
131249b5e25fSSatish Balay     ierr = PetscFree(jl);CHKERRQ(ierr);
131349b5e25fSSatish Balay     ierr = PetscFree(sum);CHKERRQ(ierr);
131449b5e25fSSatish Balay   } else {
1315347d480fSBarry Smith     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
131649b5e25fSSatish Balay   }
131749b5e25fSSatish Balay   PetscFunctionReturn(0);
131849b5e25fSSatish Balay }
131949b5e25fSSatish Balay 
13204a2ae208SSatish Balay #undef __FUNCT__
13214a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
132249b5e25fSSatish Balay int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
132349b5e25fSSatish Balay {
132449b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
132549b5e25fSSatish Balay   int          ierr;
132649b5e25fSSatish Balay 
132749b5e25fSSatish Balay   PetscFunctionBegin;
132849b5e25fSSatish Balay 
132949b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
13306c6c5352SBarry Smith   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
1331ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1332ef511fbeSHong Zhang     PetscFunctionReturn(0);
133349b5e25fSSatish Balay   }
133449b5e25fSSatish Balay 
133549b5e25fSSatish Balay   /* if the a->i are the same */
133649b5e25fSSatish Balay   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr);
133749b5e25fSSatish Balay   if (*flg == PETSC_FALSE) {
133849b5e25fSSatish Balay     PetscFunctionReturn(0);
133949b5e25fSSatish Balay   }
134049b5e25fSSatish Balay 
134149b5e25fSSatish Balay   /* if a->j are the same */
13426c6c5352SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);CHKERRQ(ierr);
134349b5e25fSSatish Balay   if (*flg == PETSC_FALSE) {
134449b5e25fSSatish Balay     PetscFunctionReturn(0);
134549b5e25fSSatish Balay   }
134649b5e25fSSatish Balay   /* if a->a are the same */
13476c6c5352SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
134849b5e25fSSatish Balay 
1349935af2e7SHong Zhang   PetscFunctionReturn(0);
135049b5e25fSSatish Balay }
135149b5e25fSSatish Balay 
13524a2ae208SSatish Balay #undef __FUNCT__
13534a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
135449b5e25fSSatish Balay int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
135549b5e25fSSatish Balay {
135649b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
135749b5e25fSSatish Balay   int          ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
135887828ca2SBarry Smith   PetscScalar  *x,zero = 0.0;
135949b5e25fSSatish Balay   MatScalar    *aa,*aa_j;
136049b5e25fSSatish Balay 
136149b5e25fSSatish Balay   PetscFunctionBegin;
136249b5e25fSSatish Balay   bs   = a->bs;
136382799104SHong Zhang   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
136482799104SHong Zhang 
136549b5e25fSSatish Balay   aa   = a->a;
136649b5e25fSSatish Balay   ai   = a->i;
136749b5e25fSSatish Balay   aj   = a->j;
136849b5e25fSSatish Balay   ambs = a->mbs;
136949b5e25fSSatish Balay   bs2  = a->bs2;
137049b5e25fSSatish Balay 
137149b5e25fSSatish Balay   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1372*1ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
137349b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1374ef511fbeSHong Zhang   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
137549b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
137649b5e25fSSatish Balay     j=ai[i];
137749b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
137849b5e25fSSatish Balay       row  = i*bs;
137949b5e25fSSatish Balay       aa_j = aa + j*bs2;
138082799104SHong Zhang       if (A->factor && bs==1){
138182799104SHong Zhang         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
138282799104SHong Zhang       } else {
138349b5e25fSSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
138449b5e25fSSatish Balay       }
138549b5e25fSSatish Balay     }
138682799104SHong Zhang   }
138782799104SHong Zhang 
1388*1ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
138949b5e25fSSatish Balay   PetscFunctionReturn(0);
139049b5e25fSSatish Balay }
139149b5e25fSSatish Balay 
13924a2ae208SSatish Balay #undef __FUNCT__
13934a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
139449b5e25fSSatish Balay int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
139549b5e25fSSatish Balay {
139649b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
139787828ca2SBarry Smith   PetscScalar  *l,*r,x,*li,*ri;
139849b5e25fSSatish Balay   MatScalar    *aa,*v;
1399066653e3SSatish Balay   int          ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2;
140049b5e25fSSatish Balay 
140149b5e25fSSatish Balay   PetscFunctionBegin;
140249b5e25fSSatish Balay   ai  = a->i;
140349b5e25fSSatish Balay   aj  = a->j;
140449b5e25fSSatish Balay   aa  = a->a;
1405ef511fbeSHong Zhang   m   = A->m;
140649b5e25fSSatish Balay   bs  = a->bs;
140749b5e25fSSatish Balay   mbs = a->mbs;
140849b5e25fSSatish Balay   bs2 = a->bs2;
140949b5e25fSSatish Balay 
141049b5e25fSSatish Balay   if (ll != rr) {
1411347d480fSBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
141249b5e25fSSatish Balay   }
141349b5e25fSSatish Balay   if (ll) {
1414*1ebc52fbSHong Zhang     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
141549b5e25fSSatish Balay     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1416347d480fSBarry Smith     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
141749b5e25fSSatish Balay     for (i=0; i<mbs; i++) { /* for each block row */
141849b5e25fSSatish Balay       M  = ai[i+1] - ai[i];
141949b5e25fSSatish Balay       li = l + i*bs;
142049b5e25fSSatish Balay       v  = aa + bs2*ai[i];
142149b5e25fSSatish Balay       for (j=0; j<M; j++) { /* for each block */
142249b5e25fSSatish Balay         for (k=0; k<bs2; k++) {
142349b5e25fSSatish Balay           (*v++) *= li[k%bs];
142449b5e25fSSatish Balay         }
142549b5e25fSSatish Balay #ifdef CONT
142649b5e25fSSatish Balay         /* will be used to replace the above loop */
142749b5e25fSSatish Balay         ri = l + bs*aj[ai[i]+j];
142849b5e25fSSatish Balay         for (k=0; k<bs; k++) { /* column value */
142949b5e25fSSatish Balay           x = ri[k];
143049b5e25fSSatish Balay           for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
143149b5e25fSSatish Balay         }
143249b5e25fSSatish Balay #endif
143349b5e25fSSatish Balay 
143449b5e25fSSatish Balay       }
143549b5e25fSSatish Balay     }
1436*1ebc52fbSHong Zhang     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
14376c6c5352SBarry Smith     PetscLogFlops(2*a->nz);
143849b5e25fSSatish Balay   }
143949b5e25fSSatish Balay   /* will be deleted */
144049b5e25fSSatish Balay   if (rr) {
1441*1ebc52fbSHong Zhang     ierr = VecGetArray(rr,&r);CHKERRQ(ierr);
144249b5e25fSSatish Balay     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1443347d480fSBarry Smith     if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
144449b5e25fSSatish Balay     for (i=0; i<mbs; i++) { /* for each block row */
144549b5e25fSSatish Balay       M  = ai[i+1] - ai[i];
144649b5e25fSSatish Balay       v  = aa + bs2*ai[i];
144749b5e25fSSatish Balay       for (j=0; j<M; j++) { /* for each block */
144849b5e25fSSatish Balay         ri = r + bs*aj[ai[i]+j];
144949b5e25fSSatish Balay         for (k=0; k<bs; k++) {
145049b5e25fSSatish Balay           x = ri[k];
145149b5e25fSSatish Balay           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
145249b5e25fSSatish Balay         }
145349b5e25fSSatish Balay       }
145449b5e25fSSatish Balay     }
1455*1ebc52fbSHong Zhang     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
14566c6c5352SBarry Smith     PetscLogFlops(a->nz);
145749b5e25fSSatish Balay   }
145849b5e25fSSatish Balay   PetscFunctionReturn(0);
145949b5e25fSSatish Balay }
146049b5e25fSSatish Balay 
14614a2ae208SSatish Balay #undef __FUNCT__
14624a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
146349b5e25fSSatish Balay int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
146449b5e25fSSatish Balay {
146549b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
146649b5e25fSSatish Balay 
146749b5e25fSSatish Balay   PetscFunctionBegin;
1468ef511fbeSHong Zhang   info->rows_global    = (double)A->m;
1469ef511fbeSHong Zhang   info->columns_global = (double)A->m;
1470ef511fbeSHong Zhang   info->rows_local     = (double)A->m;
1471ef511fbeSHong Zhang   info->columns_local  = (double)A->m;
147249b5e25fSSatish Balay   info->block_size     = a->bs2;
14736c6c5352SBarry Smith   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
14746c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
147549b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
147649b5e25fSSatish Balay   info->assemblies   = A->num_ass;
147749b5e25fSSatish Balay   info->mallocs      = a->reallocs;
147849b5e25fSSatish Balay   info->memory       = A->mem;
147949b5e25fSSatish Balay   if (A->factor) {
148049b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
148149b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
148249b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
148349b5e25fSSatish Balay   } else {
148449b5e25fSSatish Balay     info->fill_ratio_given  = 0;
148549b5e25fSSatish Balay     info->fill_ratio_needed = 0;
148649b5e25fSSatish Balay     info->factor_mallocs    = 0;
148749b5e25fSSatish Balay   }
148849b5e25fSSatish Balay   PetscFunctionReturn(0);
148949b5e25fSSatish Balay }
149049b5e25fSSatish Balay 
149149b5e25fSSatish Balay 
14924a2ae208SSatish Balay #undef __FUNCT__
14934a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
149449b5e25fSSatish Balay int MatZeroEntries_SeqSBAIJ(Mat A)
149549b5e25fSSatish Balay {
149649b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
149749b5e25fSSatish Balay   int         ierr;
149849b5e25fSSatish Balay 
149949b5e25fSSatish Balay   PetscFunctionBegin;
150049b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
150149b5e25fSSatish Balay   PetscFunctionReturn(0);
150249b5e25fSSatish Balay }
1503dc354874SHong Zhang 
15044a2ae208SSatish Balay #undef __FUNCT__
15054a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqSBAIJ"
1506dc354874SHong Zhang int MatGetRowMax_SeqSBAIJ(Mat A,Vec v)
1507dc354874SHong Zhang {
1508dc354874SHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1509d0f6400bSHong Zhang   int          ierr,i,j,n,row,col,bs,*ai,*aj,mbs;
1510c3fca9a7SHong Zhang   PetscReal    atmp;
1511273d9f13SBarry Smith   MatScalar    *aa;
151287828ca2SBarry Smith   PetscScalar  zero = 0.0,*x;
1513d0f6400bSHong Zhang   int          ncols,brow,bcol,krow,kcol;
1514dc354874SHong Zhang 
1515dc354874SHong Zhang   PetscFunctionBegin;
1516dc354874SHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1517dc354874SHong Zhang   bs   = a->bs;
1518dc354874SHong Zhang   aa   = a->a;
1519dc354874SHong Zhang   ai   = a->i;
1520dc354874SHong Zhang   aj   = a->j;
152144117c81SHong Zhang   mbs = a->mbs;
1522dc354874SHong Zhang 
1523dc354874SHong Zhang   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1524*1ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1525dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1526ef511fbeSHong Zhang   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
152744117c81SHong Zhang   for (i=0; i<mbs; i++) {
1528d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1529d0f6400bSHong Zhang     brow  = bs*i;
153044117c81SHong Zhang     for (j=0; j<ncols; j++){
1531d0f6400bSHong Zhang       bcol = bs*(*aj);
153244117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1533d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
153444117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1535d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1536d0f6400bSHong Zhang           row = brow + krow;    /* row index */
153744117c81SHong Zhang           /* printf("val[%d,%d]: %g\n",row,col,atmp); */
1538c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1539c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
154044117c81SHong Zhang         }
154144117c81SHong Zhang       }
1542d0f6400bSHong Zhang       aj++;
1543dc354874SHong Zhang     }
1544dc354874SHong Zhang   }
1545*1ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1546dc354874SHong Zhang   PetscFunctionReturn(0);
1547dc354874SHong Zhang }
1548