xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision 444d8c10678617a9a9ffeef2a5fa4ec026b99730)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
249b5e25fSSatish Balay 
37c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h"
4c60f0209SBarry Smith #include "../src/mat/blockinvert.h"
549b5e25fSSatish Balay #include "petscbt.h"
67c4f633dSBarry Smith #include "../src/mat/impls/sbaij/seq/sbaij.h"
7f3da1532SBarry Smith #include "petscblaslapack.h"
849b5e25fSSatish Balay 
94a2ae208SSatish Balay #undef __FUNCT__
104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1113f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1249b5e25fSSatish Balay {
135eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
146849ba73SBarry Smith   PetscErrorCode ierr;
155d0c19d7SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
165d0c19d7SBarry Smith   const PetscInt *idx;
17521d7252SBarry Smith   PetscBT        table,table0;
18d94109b8SHong Zhang 
19d94109b8SHong Zhang   PetscFunctionBegin;
20e32f2f54SBarry Smith   if (ov < 0)  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
21d94109b8SHong Zhang   mbs = a->mbs;
22d94109b8SHong Zhang   ai  = a->i;
23d94109b8SHong Zhang   aj  = a->j;
24d0f46423SBarry Smith   bs  = A->rmap->bs;
25d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr);
2613f74950SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
27d0f46423SBarry Smith   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
28d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr);
29d94109b8SHong Zhang 
30d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
31d94109b8SHong Zhang     isz  = 0;
32d94109b8SHong Zhang     ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr);
33d94109b8SHong Zhang 
34d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
35d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
36d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
37d94109b8SHong Zhang 
38d94109b8SHong Zhang     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
39dbe03f88SHong Zhang     bcol_max = 0;
40d94109b8SHong Zhang     for (j=0; j<n ; ++j){
41d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
42e32f2f54SBarry Smith       if (brow >= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43dbe03f88SHong Zhang       if(!PetscBTLookupSet(table,brow)) {
44dbe03f88SHong Zhang         nidx[isz++] = brow;
45dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
46dbe03f88SHong Zhang       }
47d94109b8SHong Zhang     }
48d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
49d94109b8SHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
50d94109b8SHong Zhang 
51d94109b8SHong Zhang     k = 0;
52d94109b8SHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
53d94109b8SHong Zhang       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
54d94109b8SHong Zhang       ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr);
55efee365bSSatish Balay       for (l=k; l<isz; l++) { ierr = PetscBTSet(table0,nidx[l]);CHKERRQ(ierr); }
56d94109b8SHong Zhang 
57d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
58d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++){
59d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
60d94109b8SHong Zhang         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
61d94109b8SHong Zhang           for (l = start; l<end ; l++){
62d94109b8SHong Zhang             bcol = aj[l];
63d94109b8SHong Zhang             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
64d94109b8SHong Zhang           }
65d94109b8SHong Zhang           k++;
66d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
67d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
68d94109b8SHong Zhang           for (l = start; l<end ; l++){
69d94109b8SHong Zhang             bcol = aj[l];
70dbe03f88SHong Zhang             if (bcol > bcol_max) break;
71d94109b8SHong Zhang             if (PetscBTLookup(table0,bcol)){
72d94109b8SHong Zhang               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
73d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
74d94109b8SHong Zhang             }
75d94109b8SHong Zhang           }
76d94109b8SHong Zhang         }
77d94109b8SHong Zhang       }
78d94109b8SHong Zhang     } /* for each overlap */
79d94109b8SHong Zhang 
80d94109b8SHong Zhang     /* expand the Index Set */
81d94109b8SHong Zhang     for (j=0; j<isz; j++) {
82d94109b8SHong Zhang       for (k=0; k<bs; k++)
83d94109b8SHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
84d94109b8SHong Zhang     }
8570b3c8c7SBarry Smith     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,PETSC_COPY_VALUES,is+i);CHKERRQ(ierr);
86d94109b8SHong Zhang   }
87d94109b8SHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
88d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
89d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
90d94109b8SHong Zhang   ierr = PetscBTDestroy(table0);CHKERRQ(ierr);
915eee224dSHong Zhang   PetscFunctionReturn(0);
9249b5e25fSSatish Balay }
9349b5e25fSSatish Balay 
944a2ae208SSatish Balay #undef __FUNCT__
954a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
964aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
9749b5e25fSSatish Balay {
9849b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
996849ba73SBarry Smith   PetscErrorCode ierr;
10013f74950SBarry Smith   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
10113f74950SBarry Smith   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
1025d0c19d7SBarry Smith   PetscInt       nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
1035d0c19d7SBarry Smith   const PetscInt *irow,*aj = a->j,*ai = a->i;
10449b5e25fSSatish Balay   MatScalar      *mat_a;
10549b5e25fSSatish Balay   Mat            C;
106ace3abfcSBarry Smith   PetscBool      flag,sorted;
10749b5e25fSSatish Balay 
10849b5e25fSSatish Balay   PetscFunctionBegin;
109e32f2f54SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
11014ca34e6SBarry Smith   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
111e32f2f54SBarry Smith   if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
11249b5e25fSSatish Balay 
11349b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11449b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11549b5e25fSSatish Balay 
11674ed9c26SBarry Smith   ierr  = PetscMalloc(oldcols*sizeof(PetscInt),&smap);CHKERRQ(ierr);
11774ed9c26SBarry Smith   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
11849b5e25fSSatish Balay   ssmap = smap;
11913f74950SBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
12049b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
12149b5e25fSSatish Balay   /* determine lens of each row */
12249b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12349b5e25fSSatish Balay     kstart  = ai[irow[i]];
12449b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
12549b5e25fSSatish Balay     lens[i] = 0;
12649b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
12749b5e25fSSatish Balay         if (ssmap[aj[k]]) {
12849b5e25fSSatish Balay           lens[i]++;
12949b5e25fSSatish Balay         }
13049b5e25fSSatish Balay       }
13149b5e25fSSatish Balay     }
13249b5e25fSSatish Balay   /* Create and fill new matrix */
13349b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
13449b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
13549b5e25fSSatish Balay 
136e32f2f54SBarry Smith     if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
13713f74950SBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
138e7e72b3dSBarry Smith     if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
13913f74950SBarry Smith     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
14049b5e25fSSatish Balay     C = *B;
14149b5e25fSSatish Balay   } else {
1427adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
143f69a0ea3SMatthew Knepley     ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1447adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
145ab93d7beSBarry Smith     ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr);
14649b5e25fSSatish Balay   }
14749b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
14849b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
14949b5e25fSSatish Balay     row    = irow[i];
15049b5e25fSSatish Balay     kstart = ai[row];
15149b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
15249b5e25fSSatish Balay     mat_i  = c->i[i];
15349b5e25fSSatish Balay     mat_j  = c->j + mat_i;
15449b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
15549b5e25fSSatish Balay     mat_ilen = c->ilen + i;
15649b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
15749b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
15849b5e25fSSatish Balay         *mat_j++ = tcol - 1;
15949b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
16049b5e25fSSatish Balay         mat_a   += bs2;
16149b5e25fSSatish Balay         (*mat_ilen)++;
16249b5e25fSSatish Balay       }
16349b5e25fSSatish Balay     }
16449b5e25fSSatish Balay   }
16549b5e25fSSatish Balay 
16649b5e25fSSatish Balay   /* Free work space */
16749b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
16849b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
16949b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17049b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17149b5e25fSSatish Balay 
17249b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
17349b5e25fSSatish Balay   *B = C;
17449b5e25fSSatish Balay   PetscFunctionReturn(0);
17549b5e25fSSatish Balay }
17649b5e25fSSatish Balay 
1774a2ae208SSatish Balay #undef __FUNCT__
1784a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
1794aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
18049b5e25fSSatish Balay {
18149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
18249b5e25fSSatish Balay   IS             is1;
1836849ba73SBarry Smith   PetscErrorCode ierr;
1845d0c19d7SBarry Smith   PetscInt       *vary,*iary,nrows,i,bs=A->rmap->bs,count;
1855d0c19d7SBarry Smith   const PetscInt *irow;
18649b5e25fSSatish Balay 
18749b5e25fSSatish Balay   PetscFunctionBegin;
188e32f2f54SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
18949b5e25fSSatish Balay 
19049b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
19149b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
19249b5e25fSSatish Balay 
19349b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
19449b5e25fSSatish Balay    and form the IS with compressed IS */
19574ed9c26SBarry Smith   ierr = PetscMalloc2(a->mbs,PetscInt,&vary,a->mbs,PetscInt,&iary);CHKERRQ(ierr);
19674ed9c26SBarry Smith   ierr = PetscMemzero(vary,a->mbs*sizeof(PetscInt));CHKERRQ(ierr);
19749b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
19849b5e25fSSatish Balay 
19949b5e25fSSatish Balay   count = 0;
20049b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
201e32f2f54SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
20249b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
20349b5e25fSSatish Balay   }
20449b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
20570b3c8c7SBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr);
20674ed9c26SBarry Smith   ierr = PetscFree2(vary,iary);CHKERRQ(ierr);
20749b5e25fSSatish Balay 
2084aa3045dSJed Brown   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,scall,B);CHKERRQ(ierr);
20974ed9c26SBarry Smith   ierr = ISDestroy(is1);CHKERRQ(ierr);
21049b5e25fSSatish Balay   PetscFunctionReturn(0);
21149b5e25fSSatish Balay }
21249b5e25fSSatish Balay 
2134a2ae208SSatish Balay #undef __FUNCT__
2144a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
21513f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
21649b5e25fSSatish Balay {
2176849ba73SBarry Smith   PetscErrorCode ierr;
21813f74950SBarry Smith   PetscInt       i;
21949b5e25fSSatish Balay 
22049b5e25fSSatish Balay   PetscFunctionBegin;
22149b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
22282502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
22349b5e25fSSatish Balay   }
22449b5e25fSSatish Balay 
22549b5e25fSSatish Balay   for (i=0; i<n; i++) {
2264aa3045dSJed Brown     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr);
22749b5e25fSSatish Balay   }
22849b5e25fSSatish Balay   PetscFunctionReturn(0);
22949b5e25fSSatish Balay }
23049b5e25fSSatish Balay 
23149b5e25fSSatish Balay /* -------------------------------------------------------*/
23249b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
23349b5e25fSSatish Balay /* -------------------------------------------------------*/
23449b5e25fSSatish Balay 
2354a2ae208SSatish Balay #undef __FUNCT__
2364a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
237dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
23849b5e25fSSatish Balay {
23949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
24087828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
24149b5e25fSSatish Balay   MatScalar      *v;
2426849ba73SBarry Smith   PetscErrorCode ierr;
24313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
24498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
24549b5e25fSSatish Balay 
24649b5e25fSSatish Balay   PetscFunctionBegin;
2472dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2481ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2491ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
25049b5e25fSSatish Balay 
25149b5e25fSSatish Balay   v     = a->a;
25249b5e25fSSatish Balay   xb = x;
25349b5e25fSSatish Balay 
25449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
25549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
25649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
25749b5e25fSSatish Balay     ib = aj + *ai;
258831a3094SHong Zhang     jmin = 0;
25998c9bda7SSatish Balay     nonzerorow += (n>0);
2607fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
26149b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
26249b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
263831a3094SHong Zhang       v += 4; jmin++;
2647fbae186SHong Zhang     }
265*444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
266*444d8c10SJed Brown     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
267831a3094SHong Zhang     for (j=jmin; j<n; j++) {
26849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
26949b5e25fSSatish Balay       cval       = ib[j]*2;
27049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
27149b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
27249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
27349b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
27449b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
27549b5e25fSSatish Balay       v  += 4;
27649b5e25fSSatish Balay     }
27749b5e25fSSatish Balay     xb +=2; ai++;
27849b5e25fSSatish Balay   }
27949b5e25fSSatish Balay 
2801ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2811ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
282dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
28349b5e25fSSatish Balay   PetscFunctionReturn(0);
28449b5e25fSSatish Balay }
28549b5e25fSSatish Balay 
2864a2ae208SSatish Balay #undef __FUNCT__
2874a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
288dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
28949b5e25fSSatish Balay {
29049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
29187828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
29249b5e25fSSatish Balay   MatScalar      *v;
2936849ba73SBarry Smith   PetscErrorCode ierr;
29413f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
29598c9bda7SSatish Balay   PetscInt       nonzerorow=0;
29649b5e25fSSatish Balay 
29749b5e25fSSatish Balay   PetscFunctionBegin;
2982dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2991ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3001ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
30149b5e25fSSatish Balay 
30249b5e25fSSatish Balay   v    = a->a;
30349b5e25fSSatish Balay   xb   = x;
30449b5e25fSSatish Balay 
30549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
30649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
30749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
30849b5e25fSSatish Balay     ib = aj + *ai;
309831a3094SHong Zhang     jmin = 0;
31098c9bda7SSatish Balay     nonzerorow += (n>0);
3117fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
31249b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
31349b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
31449b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
315831a3094SHong Zhang       v += 9; jmin++;
3167fbae186SHong Zhang     }
317*444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
318*444d8c10SJed Brown     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
319831a3094SHong Zhang     for (j=jmin; j<n; j++) {
32049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
32149b5e25fSSatish Balay       cval       = ib[j]*3;
32249b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
32349b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
32449b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
32549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
32649b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
32749b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
32849b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
32949b5e25fSSatish Balay       v  += 9;
33049b5e25fSSatish Balay     }
33149b5e25fSSatish Balay     xb +=3; ai++;
33249b5e25fSSatish Balay   }
33349b5e25fSSatish Balay 
3341ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3351ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
336dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
33749b5e25fSSatish Balay   PetscFunctionReturn(0);
33849b5e25fSSatish Balay }
33949b5e25fSSatish Balay 
3404a2ae208SSatish Balay #undef __FUNCT__
3414a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
342dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
34349b5e25fSSatish Balay {
34449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
34587828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
34649b5e25fSSatish Balay   MatScalar      *v;
3476849ba73SBarry Smith   PetscErrorCode ierr;
34813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
34998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
35049b5e25fSSatish Balay 
35149b5e25fSSatish Balay   PetscFunctionBegin;
3522dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3531ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3541ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
35549b5e25fSSatish Balay 
35649b5e25fSSatish Balay   v     = a->a;
35749b5e25fSSatish Balay   xb = x;
35849b5e25fSSatish Balay 
35949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
36049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
36149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
36249b5e25fSSatish Balay     ib = aj + *ai;
363831a3094SHong Zhang     jmin = 0;
36498c9bda7SSatish Balay     nonzerorow += (n>0);
3657fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
36649b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
36749b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
36849b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
36949b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
370831a3094SHong Zhang       v += 16; jmin++;
3717fbae186SHong Zhang     }
372*444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
373*444d8c10SJed Brown     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
374831a3094SHong Zhang     for (j=jmin; j<n; j++) {
37549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
37649b5e25fSSatish Balay       cval       = ib[j]*4;
37749b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
37849b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
37949b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
38049b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
38149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
38249b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
38349b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
38449b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
38549b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
38649b5e25fSSatish Balay       v  += 16;
38749b5e25fSSatish Balay     }
38849b5e25fSSatish Balay     xb +=4; ai++;
38949b5e25fSSatish Balay   }
39049b5e25fSSatish Balay 
3911ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3921ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
393dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
39449b5e25fSSatish Balay   PetscFunctionReturn(0);
39549b5e25fSSatish Balay }
39649b5e25fSSatish Balay 
3974a2ae208SSatish Balay #undef __FUNCT__
3984a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
399dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
40049b5e25fSSatish Balay {
40149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
40287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
40349b5e25fSSatish Balay   MatScalar      *v;
4046849ba73SBarry Smith   PetscErrorCode ierr;
40513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
40698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
40749b5e25fSSatish Balay 
40849b5e25fSSatish Balay   PetscFunctionBegin;
4092dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4101ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4111ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
41249b5e25fSSatish Balay 
41349b5e25fSSatish Balay   v     = a->a;
41449b5e25fSSatish Balay   xb = x;
41549b5e25fSSatish Balay 
41649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
41749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
41849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
41949b5e25fSSatish Balay     ib = aj + *ai;
420831a3094SHong Zhang     jmin = 0;
42198c9bda7SSatish Balay     nonzerorow += (n>0);
4227fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
42349b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
42449b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
42549b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
42649b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
42749b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
428831a3094SHong Zhang       v += 25; jmin++;
4297fbae186SHong Zhang     }
430*444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
431*444d8c10SJed Brown     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
432831a3094SHong Zhang     for (j=jmin; j<n; j++) {
43349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
43449b5e25fSSatish Balay       cval       = ib[j]*5;
43549b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
43649b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
43749b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
43849b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
43949b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
44049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
44149b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
44249b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
44349b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
44449b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
44549b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
44649b5e25fSSatish Balay       v  += 25;
44749b5e25fSSatish Balay     }
44849b5e25fSSatish Balay     xb +=5; ai++;
44949b5e25fSSatish Balay   }
45049b5e25fSSatish Balay 
4511ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4521ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
453dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
45449b5e25fSSatish Balay   PetscFunctionReturn(0);
45549b5e25fSSatish Balay }
45649b5e25fSSatish Balay 
45749b5e25fSSatish Balay 
4584a2ae208SSatish Balay #undef __FUNCT__
4594a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
460dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
46149b5e25fSSatish Balay {
46249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
46387828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
46449b5e25fSSatish Balay   MatScalar      *v;
4656849ba73SBarry Smith   PetscErrorCode ierr;
46613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
46798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
46849b5e25fSSatish Balay 
46949b5e25fSSatish Balay   PetscFunctionBegin;
4702dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4711ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4721ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
47349b5e25fSSatish Balay 
47449b5e25fSSatish Balay   v     = a->a;
47549b5e25fSSatish Balay   xb = x;
47649b5e25fSSatish Balay 
47749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
47849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
47949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
48049b5e25fSSatish Balay     ib = aj + *ai;
481831a3094SHong Zhang     jmin = 0;
48298c9bda7SSatish Balay     nonzerorow += (n>0);
4837fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
48449b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
48549b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
48649b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
48749b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
48849b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
48949b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
490831a3094SHong Zhang       v += 36; jmin++;
4917fbae186SHong Zhang     }
492*444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
493*444d8c10SJed Brown     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
494831a3094SHong Zhang     for (j=jmin; j<n; j++) {
49549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
49649b5e25fSSatish Balay       cval       = ib[j]*6;
49749b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
49849b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
49949b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
50049b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
50149b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
50249b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
50349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
50449b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
50549b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
50649b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
50749b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
50849b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
50949b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
51049b5e25fSSatish Balay       v  += 36;
51149b5e25fSSatish Balay     }
51249b5e25fSSatish Balay     xb +=6; ai++;
51349b5e25fSSatish Balay   }
51449b5e25fSSatish Balay 
5151ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5161ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
517dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
51849b5e25fSSatish Balay   PetscFunctionReturn(0);
51949b5e25fSSatish Balay }
5204a2ae208SSatish Balay #undef __FUNCT__
5214a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
522dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
52349b5e25fSSatish Balay {
52449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
52587828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
52649b5e25fSSatish Balay   MatScalar      *v;
5276849ba73SBarry Smith   PetscErrorCode ierr;
52813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
52998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
53049b5e25fSSatish Balay 
53149b5e25fSSatish Balay   PetscFunctionBegin;
5322dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5331ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5341ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
53549b5e25fSSatish Balay 
53649b5e25fSSatish Balay   v     = a->a;
53749b5e25fSSatish Balay   xb = x;
53849b5e25fSSatish Balay 
53949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
54049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
54149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
54249b5e25fSSatish Balay     ib = aj + *ai;
543831a3094SHong Zhang     jmin = 0;
54498c9bda7SSatish Balay     nonzerorow += (n>0);
5457fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
54649b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
54749b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
54849b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
54949b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
55049b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
55149b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
55249b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
553831a3094SHong Zhang       v += 49; jmin++;
5547fbae186SHong Zhang     }
555*444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
556*444d8c10SJed Brown     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
557831a3094SHong Zhang     for (j=jmin; j<n; j++) {
55849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
55949b5e25fSSatish Balay       cval       = ib[j]*7;
56049b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
56149b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
56249b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
56349b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
56449b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
56549b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
56649b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
56749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
56849b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
56949b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
57049b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
57149b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
57249b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
57349b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
57449b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
57549b5e25fSSatish Balay       v  += 49;
57649b5e25fSSatish Balay     }
57749b5e25fSSatish Balay     xb +=7; ai++;
57849b5e25fSSatish Balay   }
5791ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5801ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
581dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
58249b5e25fSSatish Balay   PetscFunctionReturn(0);
58349b5e25fSSatish Balay }
58449b5e25fSSatish Balay 
58549b5e25fSSatish Balay /*
58649b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
58749b5e25fSSatish Balay */
5884a2ae208SSatish Balay #undef __FUNCT__
5894a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
590dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
59149b5e25fSSatish Balay {
59249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
59387828ca2SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
5940b60a74dSHong Zhang   MatScalar      *v;
595dfbe8321SBarry Smith   PetscErrorCode ierr;
596d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
59798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
59849b5e25fSSatish Balay 
59949b5e25fSSatish Balay   PetscFunctionBegin;
6002dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
6011ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
6021ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
60349b5e25fSSatish Balay 
60449b5e25fSSatish Balay   aj   = a->j;
60549b5e25fSSatish Balay   v    = a->a;
60649b5e25fSSatish Balay   ii   = a->i;
60749b5e25fSSatish Balay 
60849b5e25fSSatish Balay   if (!a->mult_work) {
609d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
61049b5e25fSSatish Balay   }
61149b5e25fSSatish Balay   work = a->mult_work;
61249b5e25fSSatish Balay 
61349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
61449b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
61549b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
61698c9bda7SSatish Balay     nonzerorow += (n>0);
61749b5e25fSSatish Balay 
61849b5e25fSSatish Balay     /* upper triangular part */
61949b5e25fSSatish Balay     for (j=0; j<n; j++) {
62049b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
62149b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
62249b5e25fSSatish Balay       workt += bs;
62349b5e25fSSatish Balay     }
62449b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
62549b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
62649b5e25fSSatish Balay 
62749b5e25fSSatish Balay     /* strict lower triangular part */
628831a3094SHong Zhang     idx = aj+ii[0];
629831a3094SHong Zhang     if (*idx == i){
63096b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
631831a3094SHong Zhang     }
63296b9376eSHong Zhang 
63349b5e25fSSatish Balay     if (ncols > 0){
63449b5e25fSSatish Balay       workt = work;
63587828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
636831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
637831a3094SHong Zhang       for (j=0; j<n; j++) {
638831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
63949b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
64049b5e25fSSatish Balay         workt += bs;
64149b5e25fSSatish Balay       }
64249b5e25fSSatish Balay     }
64349b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
64449b5e25fSSatish Balay   }
64549b5e25fSSatish Balay 
6461ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6471ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
648dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr);
64949b5e25fSSatish Balay   PetscFunctionReturn(0);
65049b5e25fSSatish Balay }
65149b5e25fSSatish Balay 
6524a2ae208SSatish Balay #undef __FUNCT__
6534a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
654dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
65549b5e25fSSatish Balay {
65649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
657bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1;
65849b5e25fSSatish Balay   MatScalar      *v;
6596849ba73SBarry Smith   PetscErrorCode ierr;
66013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
66198c9bda7SSatish Balay   PetscInt       nonzerorow=0;
66249b5e25fSSatish Balay 
66349b5e25fSSatish Balay   PetscFunctionBegin;
664343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
6651ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6661ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
66749b5e25fSSatish Balay   v  = a->a;
66849b5e25fSSatish Balay   xb = x;
66949b5e25fSSatish Balay 
67049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
67149b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
67249b5e25fSSatish Balay     x1 = xb[0];
67349b5e25fSSatish Balay     ib = aj + *ai;
674831a3094SHong Zhang     jmin = 0;
67598c9bda7SSatish Balay     nonzerorow += (n>0);
676831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
677831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
678831a3094SHong Zhang     }
679831a3094SHong Zhang     for (j=jmin; j<n; j++) {
68049b5e25fSSatish Balay       cval    = *ib;
68149b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
68249b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
68349b5e25fSSatish Balay     }
68449b5e25fSSatish Balay     xb++; ai++;
68549b5e25fSSatish Balay   }
68649b5e25fSSatish Balay 
6871ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
688bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
68949b5e25fSSatish Balay 
690dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
69149b5e25fSSatish Balay   PetscFunctionReturn(0);
69249b5e25fSSatish Balay }
69349b5e25fSSatish Balay 
6944a2ae208SSatish Balay #undef __FUNCT__
6954a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
696dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
69749b5e25fSSatish Balay {
69849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
699bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2;
70049b5e25fSSatish Balay   MatScalar      *v;
7016849ba73SBarry Smith   PetscErrorCode ierr;
70213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
70398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
70449b5e25fSSatish Balay 
70549b5e25fSSatish Balay   PetscFunctionBegin;
706343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
7071ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7081ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
70949b5e25fSSatish Balay 
71049b5e25fSSatish Balay   v  = a->a;
71149b5e25fSSatish Balay   xb = x;
71249b5e25fSSatish Balay 
71349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
71449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
71549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
71649b5e25fSSatish Balay     ib = aj + *ai;
717831a3094SHong Zhang     jmin = 0;
71898c9bda7SSatish Balay     nonzerorow += (n>0);
7197fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
72049b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
72149b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
722831a3094SHong Zhang       v += 4; jmin++;
7237fbae186SHong Zhang     }
724*444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
725*444d8c10SJed Brown     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
726831a3094SHong Zhang     for (j=jmin; j<n; j++) {
72749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
72849b5e25fSSatish Balay       cval       = ib[j]*2;
72949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
73049b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
73149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
73249b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
73349b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
73449b5e25fSSatish Balay       v  += 4;
73549b5e25fSSatish Balay     }
73649b5e25fSSatish Balay     xb +=2; ai++;
73749b5e25fSSatish Balay   }
7381ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
739bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
74049b5e25fSSatish Balay 
741dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
74249b5e25fSSatish Balay   PetscFunctionReturn(0);
74349b5e25fSSatish Balay }
74449b5e25fSSatish Balay 
7454a2ae208SSatish Balay #undef __FUNCT__
7464a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
747dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
74849b5e25fSSatish Balay {
74949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
750bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3;
75149b5e25fSSatish Balay   MatScalar      *v;
7526849ba73SBarry Smith   PetscErrorCode ierr;
75313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
75498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
75549b5e25fSSatish Balay 
75649b5e25fSSatish Balay   PetscFunctionBegin;
757343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
7581ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7591ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
76049b5e25fSSatish Balay 
76149b5e25fSSatish Balay   v     = a->a;
76249b5e25fSSatish Balay   xb = x;
76349b5e25fSSatish Balay 
76449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
76549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
76649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
76749b5e25fSSatish Balay     ib = aj + *ai;
768831a3094SHong Zhang     jmin = 0;
76998c9bda7SSatish Balay     nonzerorow += (n>0);
7707fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
77149b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
77249b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
77349b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
774831a3094SHong Zhang      v += 9; jmin++;
7757fbae186SHong Zhang     }
776*444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
777*444d8c10SJed Brown     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
778831a3094SHong Zhang     for (j=jmin; j<n; j++) {
77949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
78049b5e25fSSatish Balay       cval       = ib[j]*3;
78149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
78249b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
78349b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
78449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
78549b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
78649b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
78749b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
78849b5e25fSSatish Balay       v  += 9;
78949b5e25fSSatish Balay     }
79049b5e25fSSatish Balay     xb +=3; ai++;
79149b5e25fSSatish Balay   }
79249b5e25fSSatish Balay 
7931ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
794bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
79549b5e25fSSatish Balay 
796dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
79749b5e25fSSatish Balay   PetscFunctionReturn(0);
79849b5e25fSSatish Balay }
79949b5e25fSSatish Balay 
8004a2ae208SSatish Balay #undef __FUNCT__
8014a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
802dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
80349b5e25fSSatish Balay {
80449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
805bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
80649b5e25fSSatish Balay   MatScalar      *v;
8076849ba73SBarry Smith   PetscErrorCode ierr;
80813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
80998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
81049b5e25fSSatish Balay 
81149b5e25fSSatish Balay   PetscFunctionBegin;
812343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
8131ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8141ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
81549b5e25fSSatish Balay 
81649b5e25fSSatish Balay   v     = a->a;
81749b5e25fSSatish Balay   xb = x;
81849b5e25fSSatish Balay 
81949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
82049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
82149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
82249b5e25fSSatish Balay     ib = aj + *ai;
823831a3094SHong Zhang     jmin = 0;
82498c9bda7SSatish Balay     nonzerorow += (n>0);
8257fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
82649b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
82749b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
82849b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
82949b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
830831a3094SHong Zhang       v += 16; jmin++;
8317fbae186SHong Zhang     }
832*444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
833*444d8c10SJed Brown     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
834831a3094SHong Zhang     for (j=jmin; j<n; j++) {
83549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
83649b5e25fSSatish Balay       cval       = ib[j]*4;
83749b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
83849b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
83949b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
84049b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
84149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
84249b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
84349b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
84449b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
84549b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
84649b5e25fSSatish Balay       v  += 16;
84749b5e25fSSatish Balay     }
84849b5e25fSSatish Balay     xb +=4; ai++;
84949b5e25fSSatish Balay   }
85049b5e25fSSatish Balay 
8511ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
852bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
85349b5e25fSSatish Balay 
854dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
85549b5e25fSSatish Balay   PetscFunctionReturn(0);
85649b5e25fSSatish Balay }
85749b5e25fSSatish Balay 
8584a2ae208SSatish Balay #undef __FUNCT__
8594a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
860dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
86149b5e25fSSatish Balay {
86249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
863bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
86449b5e25fSSatish Balay   MatScalar      *v;
8656849ba73SBarry Smith   PetscErrorCode ierr;
86613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
86798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
86849b5e25fSSatish Balay 
86949b5e25fSSatish Balay   PetscFunctionBegin;
870343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
8711ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8721ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
87349b5e25fSSatish Balay 
87449b5e25fSSatish Balay   v     = a->a;
87549b5e25fSSatish Balay   xb = x;
87649b5e25fSSatish Balay 
87749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
87849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
87949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
88049b5e25fSSatish Balay     ib = aj + *ai;
881831a3094SHong Zhang     jmin = 0;
88298c9bda7SSatish Balay     nonzerorow += (n>0);
8837fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
88449b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
88549b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
88649b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
88749b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
88849b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
889831a3094SHong Zhang       v += 25; jmin++;
8907fbae186SHong Zhang     }
891*444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
892*444d8c10SJed Brown     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
893831a3094SHong Zhang     for (j=jmin; j<n; j++) {
89449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
89549b5e25fSSatish Balay       cval       = ib[j]*5;
89649b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
89749b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
89849b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
89949b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
90049b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
90149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
90249b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
90349b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
90449b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
90549b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
90649b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
90749b5e25fSSatish Balay       v  += 25;
90849b5e25fSSatish Balay     }
90949b5e25fSSatish Balay     xb +=5; ai++;
91049b5e25fSSatish Balay   }
91149b5e25fSSatish Balay 
9121ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
913bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
91449b5e25fSSatish Balay 
915dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
91649b5e25fSSatish Balay   PetscFunctionReturn(0);
91749b5e25fSSatish Balay }
9184a2ae208SSatish Balay #undef __FUNCT__
9194a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
920dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
92149b5e25fSSatish Balay {
92249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
923bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
92449b5e25fSSatish Balay   MatScalar      *v;
9256849ba73SBarry Smith   PetscErrorCode ierr;
92613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
92798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
92849b5e25fSSatish Balay 
92949b5e25fSSatish Balay   PetscFunctionBegin;
930343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
9311ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9321ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
93349b5e25fSSatish Balay 
93449b5e25fSSatish Balay   v     = a->a;
93549b5e25fSSatish Balay   xb = x;
93649b5e25fSSatish Balay 
93749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
93849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
93949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
94049b5e25fSSatish Balay     ib = aj + *ai;
941831a3094SHong Zhang     jmin = 0;
94298c9bda7SSatish Balay     nonzerorow += (n>0);
9437fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
94449b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
94549b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
94649b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
94749b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
94849b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
94949b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
950831a3094SHong Zhang       v += 36; jmin++;
9517fbae186SHong Zhang     }
952*444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
953*444d8c10SJed Brown     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
954831a3094SHong Zhang     for (j=jmin; j<n; j++) {
95549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
95649b5e25fSSatish Balay       cval       = ib[j]*6;
95749b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
95849b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
95949b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
96049b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
96149b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
96249b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
96349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
96449b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
96549b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
96649b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
96749b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
96849b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
96949b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
97049b5e25fSSatish Balay       v  += 36;
97149b5e25fSSatish Balay     }
97249b5e25fSSatish Balay     xb +=6; ai++;
97349b5e25fSSatish Balay   }
97449b5e25fSSatish Balay 
9751ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
976bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
97749b5e25fSSatish Balay 
978dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
97949b5e25fSSatish Balay   PetscFunctionReturn(0);
98049b5e25fSSatish Balay }
98149b5e25fSSatish Balay 
9824a2ae208SSatish Balay #undef __FUNCT__
9834a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
984dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
98549b5e25fSSatish Balay {
98649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
987bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
98849b5e25fSSatish Balay   MatScalar      *v;
9896849ba73SBarry Smith   PetscErrorCode ierr;
99013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
99198c9bda7SSatish Balay   PetscInt       nonzerorow=0;
99249b5e25fSSatish Balay 
99349b5e25fSSatish Balay   PetscFunctionBegin;
994343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
9951ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9961ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
99749b5e25fSSatish Balay 
99849b5e25fSSatish Balay   v     = a->a;
99949b5e25fSSatish Balay   xb = x;
100049b5e25fSSatish Balay 
100149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
100249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
100349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
100449b5e25fSSatish Balay     ib = aj + *ai;
1005831a3094SHong Zhang     jmin = 0;
100698c9bda7SSatish Balay     nonzerorow += (n>0);
10077fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
100849b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
100949b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
101049b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
101149b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
101249b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
101349b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
101449b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1015831a3094SHong Zhang       v += 49; jmin++;
10167fbae186SHong Zhang     }
1017*444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
1018*444d8c10SJed Brown     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1019831a3094SHong Zhang     for (j=jmin; j<n; j++) {
102049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
102149b5e25fSSatish Balay       cval       = ib[j]*7;
102249b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
102349b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
102449b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
102549b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
102649b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
102749b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
102849b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
102949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
103049b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
103149b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
103249b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
103349b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
103449b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
103549b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
103649b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
103749b5e25fSSatish Balay       v  += 49;
103849b5e25fSSatish Balay     }
103949b5e25fSSatish Balay     xb +=7; ai++;
104049b5e25fSSatish Balay   }
104149b5e25fSSatish Balay 
10421ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1043bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
104449b5e25fSSatish Balay 
1045dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
104649b5e25fSSatish Balay   PetscFunctionReturn(0);
104749b5e25fSSatish Balay }
104849b5e25fSSatish Balay 
10494a2ae208SSatish Balay #undef __FUNCT__
10504a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1051dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
105249b5e25fSSatish Balay {
105349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1054bba805e6SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1055066653e3SSatish Balay   MatScalar      *v;
1056dfbe8321SBarry Smith   PetscErrorCode ierr;
1057d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
105898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
105949b5e25fSSatish Balay 
106049b5e25fSSatish Balay   PetscFunctionBegin;
1061343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
10621ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
10631ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
106449b5e25fSSatish Balay 
106549b5e25fSSatish Balay   aj   = a->j;
106649b5e25fSSatish Balay   v    = a->a;
106749b5e25fSSatish Balay   ii   = a->i;
106849b5e25fSSatish Balay 
106949b5e25fSSatish Balay   if (!a->mult_work) {
1070d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
107149b5e25fSSatish Balay   }
107249b5e25fSSatish Balay   work = a->mult_work;
107349b5e25fSSatish Balay 
107449b5e25fSSatish Balay 
107549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
107649b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
107749b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
107898c9bda7SSatish Balay     nonzerorow += (n>0);
107949b5e25fSSatish Balay 
108049b5e25fSSatish Balay     /* upper triangular part */
108149b5e25fSSatish Balay     for (j=0; j<n; j++) {
108249b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
108349b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
108449b5e25fSSatish Balay       workt += bs;
108549b5e25fSSatish Balay     }
108649b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
108749b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
108849b5e25fSSatish Balay 
108949b5e25fSSatish Balay     /* strict lower triangular part */
1090831a3094SHong Zhang     idx = aj+ii[0];
1091831a3094SHong Zhang     if (*idx == i){
109296b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1093831a3094SHong Zhang     }
109449b5e25fSSatish Balay     if (ncols > 0){
109549b5e25fSSatish Balay       workt = work;
109687828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1097831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1098831a3094SHong Zhang       for (j=0; j<n; j++) {
1099831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
110049b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
110149b5e25fSSatish Balay         workt += bs;
110249b5e25fSSatish Balay       }
110349b5e25fSSatish Balay     }
110449b5e25fSSatish Balay 
110549b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
110649b5e25fSSatish Balay   }
110749b5e25fSSatish Balay 
11081ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1109bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
111049b5e25fSSatish Balay 
1111dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
111249b5e25fSSatish Balay   PetscFunctionReturn(0);
111349b5e25fSSatish Balay }
111449b5e25fSSatish Balay 
11154a2ae208SSatish Balay #undef __FUNCT__
11164a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1117f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
111849b5e25fSSatish Balay {
111949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1120f4df32b1SMatthew Knepley   PetscScalar    oalpha = alpha;
1121efee365bSSatish Balay   PetscErrorCode ierr;
11220805154bSBarry Smith   PetscBLASInt   one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz);
112349b5e25fSSatish Balay 
112449b5e25fSSatish Balay   PetscFunctionBegin;
1125f4df32b1SMatthew Knepley   BLASscal_(&totalnz,&oalpha,a->a,&one);
1126efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
112749b5e25fSSatish Balay   PetscFunctionReturn(0);
112849b5e25fSSatish Balay }
112949b5e25fSSatish Balay 
11304a2ae208SSatish Balay #undef __FUNCT__
11314a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1132dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
113349b5e25fSSatish Balay {
113449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
113549b5e25fSSatish Balay   MatScalar      *v = a->a;
113649b5e25fSSatish Balay   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1137d0f46423SBarry Smith   PetscInt       i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
11386849ba73SBarry Smith   PetscErrorCode ierr;
113913f74950SBarry Smith   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
114049b5e25fSSatish Balay 
114149b5e25fSSatish Balay   PetscFunctionBegin;
114249b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
114349b5e25fSSatish Balay     for (k=0; k<mbs; k++){
114449b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1145831a3094SHong Zhang       col  = aj + jmin;
1146831a3094SHong Zhang       if (*col == k){         /* diagonal block */
114749b5e25fSSatish Balay         for (i=0; i<bs2; i++){
114849b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
114949b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
115049b5e25fSSatish Balay #else
115149b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
115249b5e25fSSatish Balay #endif
115349b5e25fSSatish Balay         }
1154831a3094SHong Zhang         jmin++;
1155831a3094SHong Zhang       }
1156831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
115749b5e25fSSatish Balay         for (i=0; i<bs2; i++){
115849b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
115949b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
116049b5e25fSSatish Balay #else
116149b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
116249b5e25fSSatish Balay #endif
116349b5e25fSSatish Balay         }
116449b5e25fSSatish Balay       }
116549b5e25fSSatish Balay     }
116649b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
11670b8dc8d2SHong Zhang   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
116874ed9c26SBarry Smith     ierr = PetscMalloc3(bs,PetscReal,&sum,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
11690b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11700b8dc8d2SHong Zhang     il[0] = 0;
117149b5e25fSSatish Balay 
117249b5e25fSSatish Balay     *norm = 0.0;
117349b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
117449b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
117549b5e25fSSatish Balay       /*-- col sum --*/
117649b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1177831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1178831a3094SHong Zhang                   at step k */
117949b5e25fSSatish Balay       while (i<mbs){
118049b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
118149b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
118249b5e25fSSatish Balay         for (j=0; j<bs; j++){
118349b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
118449b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
118549b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
118649b5e25fSSatish Balay           }
118749b5e25fSSatish Balay         }
118849b5e25fSSatish Balay         /* update il, jl */
1189831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1190831a3094SHong Zhang         jmax = a->i[i+1];
119149b5e25fSSatish Balay         if (jmin < jmax){
119249b5e25fSSatish Balay           il[i] = jmin;
119349b5e25fSSatish Balay           j   = a->j[jmin];
119449b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
119549b5e25fSSatish Balay         }
119649b5e25fSSatish Balay         i = nexti;
119749b5e25fSSatish Balay       }
119849b5e25fSSatish Balay       /*-- row sum --*/
119949b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
120049b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
120149b5e25fSSatish Balay         for (j=0; j<bs; j++){
120249b5e25fSSatish Balay           v = a->a + i*bs2 + j;
120349b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
12040b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
120549b5e25fSSatish Balay           }
120649b5e25fSSatish Balay         }
120749b5e25fSSatish Balay       }
120849b5e25fSSatish Balay       /* add k_th block row to il, jl */
1209831a3094SHong Zhang       col = aj+jmin;
1210831a3094SHong Zhang       if (*col == k) jmin++;
121149b5e25fSSatish Balay       if (jmin < jmax){
121249b5e25fSSatish Balay         il[k] = jmin;
12130b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
121449b5e25fSSatish Balay       }
121549b5e25fSSatish Balay       for (j=0; j<bs; j++){
121649b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
121749b5e25fSSatish Balay       }
121849b5e25fSSatish Balay     }
121974ed9c26SBarry Smith     ierr = PetscFree3(sum,il,jl);CHKERRQ(ierr);
122049b5e25fSSatish Balay   } else {
1221e32f2f54SBarry Smith     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
122249b5e25fSSatish Balay   }
122349b5e25fSSatish Balay   PetscFunctionReturn(0);
122449b5e25fSSatish Balay }
122549b5e25fSSatish Balay 
12264a2ae208SSatish Balay #undef __FUNCT__
12274a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1228ace3abfcSBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscBool * flg)
122949b5e25fSSatish Balay {
123049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1231dfbe8321SBarry Smith   PetscErrorCode ierr;
123249b5e25fSSatish Balay 
123349b5e25fSSatish Balay   PetscFunctionBegin;
123449b5e25fSSatish Balay 
123549b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1236d0f46423SBarry Smith   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1237ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1238ef511fbeSHong Zhang     PetscFunctionReturn(0);
123949b5e25fSSatish Balay   }
124049b5e25fSSatish Balay 
124149b5e25fSSatish Balay   /* if the a->i are the same */
124213f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1243abc0a331SBarry Smith   if (!*flg) {
124449b5e25fSSatish Balay     PetscFunctionReturn(0);
124549b5e25fSSatish Balay   }
124649b5e25fSSatish Balay 
124749b5e25fSSatish Balay   /* if a->j are the same */
124813f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1249abc0a331SBarry Smith   if (!*flg) {
125049b5e25fSSatish Balay     PetscFunctionReturn(0);
125149b5e25fSSatish Balay   }
125249b5e25fSSatish Balay   /* if a->a are the same */
1253d0f46423SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1254935af2e7SHong Zhang   PetscFunctionReturn(0);
125549b5e25fSSatish Balay }
125649b5e25fSSatish Balay 
12574a2ae208SSatish Balay #undef __FUNCT__
12584a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1259dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
126049b5e25fSSatish Balay {
126149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1262dfbe8321SBarry Smith   PetscErrorCode ierr;
126313f74950SBarry Smith   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
126487828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
126549b5e25fSSatish Balay   MatScalar      *aa,*aa_j;
126649b5e25fSSatish Balay 
126749b5e25fSSatish Balay   PetscFunctionBegin;
1268d0f46423SBarry Smith   bs   = A->rmap->bs;
1269e32f2f54SBarry Smith   if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
127082799104SHong Zhang 
127149b5e25fSSatish Balay   aa   = a->a;
127249b5e25fSSatish Balay   ai   = a->i;
127349b5e25fSSatish Balay   aj   = a->j;
127449b5e25fSSatish Balay   ambs = a->mbs;
127549b5e25fSSatish Balay   bs2  = a->bs2;
127649b5e25fSSatish Balay 
12772dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
12781ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
127949b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1280e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
128149b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
128249b5e25fSSatish Balay     j=ai[i];
128349b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
128449b5e25fSSatish Balay       row  = i*bs;
128549b5e25fSSatish Balay       aa_j = aa + j*bs2;
1286d5f3da31SBarry Smith       if (A->factortype && bs==1){
128782799104SHong Zhang         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
128882799104SHong Zhang       } else {
128949b5e25fSSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
129049b5e25fSSatish Balay       }
129149b5e25fSSatish Balay     }
129282799104SHong Zhang   }
129382799104SHong Zhang 
12941ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
129549b5e25fSSatish Balay   PetscFunctionReturn(0);
129649b5e25fSSatish Balay }
129749b5e25fSSatish Balay 
12984a2ae208SSatish Balay #undef __FUNCT__
12994a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1300dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
130149b5e25fSSatish Balay {
130249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
13035e90f9d9SHong Zhang   PetscScalar    *l,x,*li,*ri;
130449b5e25fSSatish Balay   MatScalar      *aa,*v;
1305dfbe8321SBarry Smith   PetscErrorCode ierr;
13065e90f9d9SHong Zhang   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1307ace3abfcSBarry Smith   PetscBool      flg;
130849b5e25fSSatish Balay 
130949b5e25fSSatish Balay   PetscFunctionBegin;
1310b3bf805bSHong Zhang   if (ll != rr){
1311b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1312e7e72b3dSBarry Smith     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1313b3bf805bSHong Zhang   }
1314b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
131549b5e25fSSatish Balay   ai  = a->i;
131649b5e25fSSatish Balay   aj  = a->j;
131749b5e25fSSatish Balay   aa  = a->a;
1318d0f46423SBarry Smith   m   = A->rmap->N;
1319d0f46423SBarry Smith   bs  = A->rmap->bs;
132049b5e25fSSatish Balay   mbs = a->mbs;
132149b5e25fSSatish Balay   bs2 = a->bs2;
132249b5e25fSSatish Balay 
13231ebc52fbSHong Zhang   ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
132449b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1325e32f2f54SBarry Smith   if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
132649b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
132749b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
132849b5e25fSSatish Balay     li = l + i*bs;
132949b5e25fSSatish Balay     v  = aa + bs2*ai[i];
133049b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
133149b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
13325e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
133349b5e25fSSatish Balay         x = ri[k];
133449b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
133549b5e25fSSatish Balay       }
133649b5e25fSSatish Balay     }
133749b5e25fSSatish Balay   }
13381ebc52fbSHong Zhang   ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1339dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
134049b5e25fSSatish Balay   PetscFunctionReturn(0);
134149b5e25fSSatish Balay }
134249b5e25fSSatish Balay 
13434a2ae208SSatish Balay #undef __FUNCT__
13444a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1345dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
134649b5e25fSSatish Balay {
134749b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
134849b5e25fSSatish Balay 
134949b5e25fSSatish Balay   PetscFunctionBegin;
135049b5e25fSSatish Balay   info->block_size     = a->bs2;
1351ceed8ce5SJed Brown   info->nz_allocated   = a->bs2*a->maxnz; /*num. of nonzeros in upper triangular part */
13526c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
135349b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
135449b5e25fSSatish Balay   info->assemblies   = A->num_ass;
13558e58a170SBarry Smith   info->mallocs      = A->info.mallocs;
13567adad957SLisandro Dalcin   info->memory       = ((PetscObject)A)->mem;
1357d5f3da31SBarry Smith   if (A->factortype) {
135849b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
135949b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
136049b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
136149b5e25fSSatish Balay   } else {
136249b5e25fSSatish Balay     info->fill_ratio_given  = 0;
136349b5e25fSSatish Balay     info->fill_ratio_needed = 0;
136449b5e25fSSatish Balay     info->factor_mallocs    = 0;
136549b5e25fSSatish Balay   }
136649b5e25fSSatish Balay   PetscFunctionReturn(0);
136749b5e25fSSatish Balay }
136849b5e25fSSatish Balay 
136949b5e25fSSatish Balay 
13704a2ae208SSatish Balay #undef __FUNCT__
13714a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1372dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
137349b5e25fSSatish Balay {
137449b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1375dfbe8321SBarry Smith   PetscErrorCode ierr;
137649b5e25fSSatish Balay 
137749b5e25fSSatish Balay   PetscFunctionBegin;
137849b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
137949b5e25fSSatish Balay   PetscFunctionReturn(0);
138049b5e25fSSatish Balay }
1381dc354874SHong Zhang 
13824a2ae208SSatish Balay #undef __FUNCT__
1383985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ"
1384985db425SBarry Smith /*
1385985db425SBarry Smith    This code does not work since it only checks the upper triangular part of
1386985db425SBarry Smith   the matrix. Hence it is not listed in the function table.
1387985db425SBarry Smith */
1388985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1389dc354874SHong Zhang {
1390dc354874SHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1391dfbe8321SBarry Smith   PetscErrorCode ierr;
139213f74950SBarry Smith   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1393c3fca9a7SHong Zhang   PetscReal      atmp;
1394273d9f13SBarry Smith   MatScalar      *aa;
1395985db425SBarry Smith   PetscScalar    *x;
139613f74950SBarry Smith   PetscInt       ncols,brow,bcol,krow,kcol;
1397dc354874SHong Zhang 
1398dc354874SHong Zhang   PetscFunctionBegin;
1399e32f2f54SBarry Smith   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1400e32f2f54SBarry Smith   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1401d0f46423SBarry Smith   bs   = A->rmap->bs;
1402dc354874SHong Zhang   aa   = a->a;
1403dc354874SHong Zhang   ai   = a->i;
1404dc354874SHong Zhang   aj   = a->j;
140544117c81SHong Zhang   mbs = a->mbs;
1406dc354874SHong Zhang 
1407985db425SBarry Smith   ierr = VecSet(v,0.0);CHKERRQ(ierr);
14081ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1409dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1410e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
141144117c81SHong Zhang   for (i=0; i<mbs; i++) {
1412d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1413d0f6400bSHong Zhang     brow  = bs*i;
141444117c81SHong Zhang     for (j=0; j<ncols; j++){
1415d0f6400bSHong Zhang       bcol = bs*(*aj);
141644117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1417d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
141844117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1419d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1420d0f6400bSHong Zhang           row = brow + krow;    /* row index */
1421c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1422c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
142344117c81SHong Zhang         }
142444117c81SHong Zhang       }
1425d0f6400bSHong Zhang       aj++;
1426dc354874SHong Zhang     }
1427dc354874SHong Zhang   }
14281ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1429dc354874SHong Zhang   PetscFunctionReturn(0);
1430dc354874SHong Zhang }
1431