xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision ceed8ce595262cd19e3c8ab4548d189500d2242c)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
249b5e25fSSatish Balay 
37c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h"
4c60f0209SBarry Smith #include "../src/mat/blockinvert.h"
549b5e25fSSatish Balay #include "petscbt.h"
67c4f633dSBarry Smith #include "../src/mat/impls/sbaij/seq/sbaij.h"
7f3da1532SBarry Smith #include "petscblaslapack.h"
849b5e25fSSatish Balay 
94a2ae208SSatish Balay #undef __FUNCT__
104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1113f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1249b5e25fSSatish Balay {
135eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
146849ba73SBarry Smith   PetscErrorCode ierr;
155d0c19d7SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
165d0c19d7SBarry Smith   const PetscInt *idx;
17521d7252SBarry Smith   PetscBT        table,table0;
18d94109b8SHong Zhang 
19d94109b8SHong Zhang   PetscFunctionBegin;
20e32f2f54SBarry Smith   if (ov < 0)  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
21d94109b8SHong Zhang   mbs = a->mbs;
22d94109b8SHong Zhang   ai  = a->i;
23d94109b8SHong Zhang   aj  = a->j;
24d0f46423SBarry Smith   bs  = A->rmap->bs;
25d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr);
2613f74950SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
27d0f46423SBarry Smith   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
28d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr);
29d94109b8SHong Zhang 
30d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
31d94109b8SHong Zhang     isz  = 0;
32d94109b8SHong Zhang     ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr);
33d94109b8SHong Zhang 
34d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
35d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
36d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
37d94109b8SHong Zhang 
38d94109b8SHong Zhang     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
39dbe03f88SHong Zhang     bcol_max = 0;
40d94109b8SHong Zhang     for (j=0; j<n ; ++j){
41d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
42e32f2f54SBarry Smith       if (brow >= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43dbe03f88SHong Zhang       if(!PetscBTLookupSet(table,brow)) {
44dbe03f88SHong Zhang         nidx[isz++] = brow;
45dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
46dbe03f88SHong Zhang       }
47d94109b8SHong Zhang     }
48d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
49d94109b8SHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
50d94109b8SHong Zhang 
51d94109b8SHong Zhang     k = 0;
52d94109b8SHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
53d94109b8SHong Zhang       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
54d94109b8SHong Zhang       ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr);
55efee365bSSatish Balay       for (l=k; l<isz; l++) { ierr = PetscBTSet(table0,nidx[l]);CHKERRQ(ierr); }
56d94109b8SHong Zhang 
57d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
58d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++){
59d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
60d94109b8SHong Zhang         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
61d94109b8SHong Zhang           for (l = start; l<end ; l++){
62d94109b8SHong Zhang             bcol = aj[l];
63d94109b8SHong Zhang             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
64d94109b8SHong Zhang           }
65d94109b8SHong Zhang           k++;
66d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
67d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
68d94109b8SHong Zhang           for (l = start; l<end ; l++){
69d94109b8SHong Zhang             bcol = aj[l];
70dbe03f88SHong Zhang             if (bcol > bcol_max) break;
71d94109b8SHong Zhang             if (PetscBTLookup(table0,bcol)){
72d94109b8SHong Zhang               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
73d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
74d94109b8SHong Zhang             }
75d94109b8SHong Zhang           }
76d94109b8SHong Zhang         }
77d94109b8SHong Zhang       }
78d94109b8SHong Zhang     } /* for each overlap */
79d94109b8SHong Zhang 
80d94109b8SHong Zhang     /* expand the Index Set */
81d94109b8SHong Zhang     for (j=0; j<isz; j++) {
82d94109b8SHong Zhang       for (k=0; k<bs; k++)
83d94109b8SHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
84d94109b8SHong Zhang     }
85d94109b8SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
86d94109b8SHong Zhang   }
87d94109b8SHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
88d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
89d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
90d94109b8SHong Zhang   ierr = PetscBTDestroy(table0);CHKERRQ(ierr);
915eee224dSHong Zhang   PetscFunctionReturn(0);
9249b5e25fSSatish Balay }
9349b5e25fSSatish Balay 
944a2ae208SSatish Balay #undef __FUNCT__
954a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
964aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
9749b5e25fSSatish Balay {
9849b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
996849ba73SBarry Smith   PetscErrorCode ierr;
10013f74950SBarry Smith   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
10113f74950SBarry Smith   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
1025d0c19d7SBarry Smith   PetscInt       nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
1035d0c19d7SBarry Smith   const PetscInt *irow,*aj = a->j,*ai = a->i;
10449b5e25fSSatish Balay   MatScalar      *mat_a;
10549b5e25fSSatish Balay   Mat            C;
10614ca34e6SBarry Smith   PetscTruth     flag,sorted;
10749b5e25fSSatish Balay 
10849b5e25fSSatish Balay   PetscFunctionBegin;
109e32f2f54SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
11014ca34e6SBarry Smith   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
111e32f2f54SBarry Smith   if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
11249b5e25fSSatish Balay 
11349b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11449b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11549b5e25fSSatish Balay 
11674ed9c26SBarry Smith   ierr  = PetscMalloc(oldcols*sizeof(PetscInt),&smap);CHKERRQ(ierr);
11774ed9c26SBarry Smith   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
11849b5e25fSSatish Balay   ssmap = smap;
11913f74950SBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
12049b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
12149b5e25fSSatish Balay   /* determine lens of each row */
12249b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12349b5e25fSSatish Balay     kstart  = ai[irow[i]];
12449b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
12549b5e25fSSatish Balay     lens[i] = 0;
12649b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
12749b5e25fSSatish Balay         if (ssmap[aj[k]]) {
12849b5e25fSSatish Balay           lens[i]++;
12949b5e25fSSatish Balay         }
13049b5e25fSSatish Balay       }
13149b5e25fSSatish Balay     }
13249b5e25fSSatish Balay   /* Create and fill new matrix */
13349b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
13449b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
13549b5e25fSSatish Balay 
136e32f2f54SBarry Smith     if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
13713f74950SBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
138e7e72b3dSBarry Smith     if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
13913f74950SBarry Smith     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
14049b5e25fSSatish Balay     C = *B;
14149b5e25fSSatish Balay   } else {
1427adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
143f69a0ea3SMatthew Knepley     ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1447adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
145ab93d7beSBarry Smith     ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr);
14649b5e25fSSatish Balay   }
14749b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
14849b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
14949b5e25fSSatish Balay     row    = irow[i];
15049b5e25fSSatish Balay     kstart = ai[row];
15149b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
15249b5e25fSSatish Balay     mat_i  = c->i[i];
15349b5e25fSSatish Balay     mat_j  = c->j + mat_i;
15449b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
15549b5e25fSSatish Balay     mat_ilen = c->ilen + i;
15649b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
15749b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
15849b5e25fSSatish Balay         *mat_j++ = tcol - 1;
15949b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
16049b5e25fSSatish Balay         mat_a   += bs2;
16149b5e25fSSatish Balay         (*mat_ilen)++;
16249b5e25fSSatish Balay       }
16349b5e25fSSatish Balay     }
16449b5e25fSSatish Balay   }
16549b5e25fSSatish Balay 
16649b5e25fSSatish Balay   /* Free work space */
16749b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
16849b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
16949b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17049b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17149b5e25fSSatish Balay 
17249b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
17349b5e25fSSatish Balay   *B = C;
17449b5e25fSSatish Balay   PetscFunctionReturn(0);
17549b5e25fSSatish Balay }
17649b5e25fSSatish Balay 
1774a2ae208SSatish Balay #undef __FUNCT__
1784a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
1794aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
18049b5e25fSSatish Balay {
18149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
18249b5e25fSSatish Balay   IS             is1;
1836849ba73SBarry Smith   PetscErrorCode ierr;
1845d0c19d7SBarry Smith   PetscInt       *vary,*iary,nrows,i,bs=A->rmap->bs,count;
1855d0c19d7SBarry Smith   const PetscInt *irow;
18649b5e25fSSatish Balay 
18749b5e25fSSatish Balay   PetscFunctionBegin;
188e32f2f54SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
18949b5e25fSSatish Balay 
19049b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
19149b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
19249b5e25fSSatish Balay 
19349b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
19449b5e25fSSatish Balay    and form the IS with compressed IS */
19574ed9c26SBarry Smith   ierr = PetscMalloc2(a->mbs,PetscInt,&vary,a->mbs,PetscInt,&iary);CHKERRQ(ierr);
19674ed9c26SBarry Smith   ierr = PetscMemzero(vary,a->mbs*sizeof(PetscInt));CHKERRQ(ierr);
19749b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
19849b5e25fSSatish Balay 
19949b5e25fSSatish Balay   count = 0;
20049b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
201e32f2f54SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
20249b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
20349b5e25fSSatish Balay   }
20449b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
20574ed9c26SBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
20674ed9c26SBarry Smith   ierr = PetscFree2(vary,iary);CHKERRQ(ierr);
20749b5e25fSSatish Balay 
2084aa3045dSJed Brown   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,scall,B);CHKERRQ(ierr);
20974ed9c26SBarry Smith   ierr = ISDestroy(is1);CHKERRQ(ierr);
21049b5e25fSSatish Balay   PetscFunctionReturn(0);
21149b5e25fSSatish Balay }
21249b5e25fSSatish Balay 
2134a2ae208SSatish Balay #undef __FUNCT__
2144a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
21513f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
21649b5e25fSSatish Balay {
2176849ba73SBarry Smith   PetscErrorCode ierr;
21813f74950SBarry Smith   PetscInt       i;
21949b5e25fSSatish Balay 
22049b5e25fSSatish Balay   PetscFunctionBegin;
22149b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
22282502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
22349b5e25fSSatish Balay   }
22449b5e25fSSatish Balay 
22549b5e25fSSatish Balay   for (i=0; i<n; i++) {
2264aa3045dSJed Brown     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr);
22749b5e25fSSatish Balay   }
22849b5e25fSSatish Balay   PetscFunctionReturn(0);
22949b5e25fSSatish Balay }
23049b5e25fSSatish Balay 
23149b5e25fSSatish Balay /* -------------------------------------------------------*/
23249b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
23349b5e25fSSatish Balay /* -------------------------------------------------------*/
23449b5e25fSSatish Balay 
2354a2ae208SSatish Balay #undef __FUNCT__
2364a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
237dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
23849b5e25fSSatish Balay {
23949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
24087828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
24149b5e25fSSatish Balay   MatScalar      *v;
2426849ba73SBarry Smith   PetscErrorCode ierr;
24313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
24498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
24549b5e25fSSatish Balay 
24649b5e25fSSatish Balay   PetscFunctionBegin;
2472dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2481ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2491ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
25049b5e25fSSatish Balay 
25149b5e25fSSatish Balay   v     = a->a;
25249b5e25fSSatish Balay   xb = x;
25349b5e25fSSatish Balay 
25449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
25549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
25649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
25749b5e25fSSatish Balay     ib = aj + *ai;
258831a3094SHong Zhang     jmin = 0;
25998c9bda7SSatish Balay     nonzerorow += (n>0);
2607fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
26149b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
26249b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
263831a3094SHong Zhang       v += 4; jmin++;
2647fbae186SHong Zhang     }
265831a3094SHong Zhang     for (j=jmin; j<n; j++) {
26649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
26749b5e25fSSatish Balay       cval       = ib[j]*2;
26849b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
26949b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
27049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
27149b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
27249b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
27349b5e25fSSatish Balay       v  += 4;
27449b5e25fSSatish Balay     }
27549b5e25fSSatish Balay     xb +=2; ai++;
27649b5e25fSSatish Balay   }
27749b5e25fSSatish Balay 
2781ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2791ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
280dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
28149b5e25fSSatish Balay   PetscFunctionReturn(0);
28249b5e25fSSatish Balay }
28349b5e25fSSatish Balay 
2844a2ae208SSatish Balay #undef __FUNCT__
2854a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
286dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
28749b5e25fSSatish Balay {
28849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
28987828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
29049b5e25fSSatish Balay   MatScalar      *v;
2916849ba73SBarry Smith   PetscErrorCode ierr;
29213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
29398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
29449b5e25fSSatish Balay 
29549b5e25fSSatish Balay   PetscFunctionBegin;
2962dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2971ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2981ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
29949b5e25fSSatish Balay 
30049b5e25fSSatish Balay   v    = a->a;
30149b5e25fSSatish Balay   xb   = x;
30249b5e25fSSatish Balay 
30349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
30449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
30549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
30649b5e25fSSatish Balay     ib = aj + *ai;
307831a3094SHong Zhang     jmin = 0;
30898c9bda7SSatish Balay     nonzerorow += (n>0);
3097fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
31049b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
31149b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
31249b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
313831a3094SHong Zhang       v += 9; jmin++;
3147fbae186SHong Zhang     }
315831a3094SHong Zhang     for (j=jmin; j<n; j++) {
31649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
31749b5e25fSSatish Balay       cval       = ib[j]*3;
31849b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
31949b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
32049b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
32149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
32249b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
32349b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
32449b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
32549b5e25fSSatish Balay       v  += 9;
32649b5e25fSSatish Balay     }
32749b5e25fSSatish Balay     xb +=3; ai++;
32849b5e25fSSatish Balay   }
32949b5e25fSSatish Balay 
3301ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3311ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
332dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
33349b5e25fSSatish Balay   PetscFunctionReturn(0);
33449b5e25fSSatish Balay }
33549b5e25fSSatish Balay 
3364a2ae208SSatish Balay #undef __FUNCT__
3374a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
338dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
33949b5e25fSSatish Balay {
34049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
34187828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
34249b5e25fSSatish Balay   MatScalar      *v;
3436849ba73SBarry Smith   PetscErrorCode ierr;
34413f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
34598c9bda7SSatish Balay   PetscInt       nonzerorow=0;
34649b5e25fSSatish Balay 
34749b5e25fSSatish Balay   PetscFunctionBegin;
3482dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3491ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3501ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
35149b5e25fSSatish Balay 
35249b5e25fSSatish Balay   v     = a->a;
35349b5e25fSSatish Balay   xb = x;
35449b5e25fSSatish Balay 
35549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
35649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
35749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
35849b5e25fSSatish Balay     ib = aj + *ai;
359831a3094SHong Zhang     jmin = 0;
36098c9bda7SSatish Balay     nonzerorow += (n>0);
3617fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
36249b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
36349b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
36449b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
36549b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
366831a3094SHong Zhang       v += 16; jmin++;
3677fbae186SHong Zhang     }
368831a3094SHong Zhang     for (j=jmin; j<n; j++) {
36949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
37049b5e25fSSatish Balay       cval       = ib[j]*4;
37149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
37249b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
37349b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
37449b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
37549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
37649b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
37749b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
37849b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
37949b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
38049b5e25fSSatish Balay       v  += 16;
38149b5e25fSSatish Balay     }
38249b5e25fSSatish Balay     xb +=4; ai++;
38349b5e25fSSatish Balay   }
38449b5e25fSSatish Balay 
3851ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3861ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
387dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
38849b5e25fSSatish Balay   PetscFunctionReturn(0);
38949b5e25fSSatish Balay }
39049b5e25fSSatish Balay 
3914a2ae208SSatish Balay #undef __FUNCT__
3924a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
393dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
39449b5e25fSSatish Balay {
39549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
39687828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
39749b5e25fSSatish Balay   MatScalar      *v;
3986849ba73SBarry Smith   PetscErrorCode ierr;
39913f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
40098c9bda7SSatish Balay   PetscInt       nonzerorow=0;
40149b5e25fSSatish Balay 
40249b5e25fSSatish Balay   PetscFunctionBegin;
4032dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4041ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4051ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
40649b5e25fSSatish Balay 
40749b5e25fSSatish Balay   v     = a->a;
40849b5e25fSSatish Balay   xb = x;
40949b5e25fSSatish Balay 
41049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
41149b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
41249b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
41349b5e25fSSatish Balay     ib = aj + *ai;
414831a3094SHong Zhang     jmin = 0;
41598c9bda7SSatish Balay     nonzerorow += (n>0);
4167fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
41749b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
41849b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
41949b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
42049b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
42149b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
422831a3094SHong Zhang       v += 25; jmin++;
4237fbae186SHong Zhang     }
424831a3094SHong Zhang     for (j=jmin; j<n; j++) {
42549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
42649b5e25fSSatish Balay       cval       = ib[j]*5;
42749b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
42849b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
42949b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
43049b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
43149b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
43249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
43349b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
43449b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
43549b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
43649b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
43749b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
43849b5e25fSSatish Balay       v  += 25;
43949b5e25fSSatish Balay     }
44049b5e25fSSatish Balay     xb +=5; ai++;
44149b5e25fSSatish Balay   }
44249b5e25fSSatish Balay 
4431ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4441ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
445dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
44649b5e25fSSatish Balay   PetscFunctionReturn(0);
44749b5e25fSSatish Balay }
44849b5e25fSSatish Balay 
44949b5e25fSSatish Balay 
4504a2ae208SSatish Balay #undef __FUNCT__
4514a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
452dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
45349b5e25fSSatish Balay {
45449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
45587828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
45649b5e25fSSatish Balay   MatScalar      *v;
4576849ba73SBarry Smith   PetscErrorCode ierr;
45813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
45998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
46049b5e25fSSatish Balay 
46149b5e25fSSatish Balay   PetscFunctionBegin;
4622dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4631ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4641ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
46549b5e25fSSatish Balay 
46649b5e25fSSatish Balay   v     = a->a;
46749b5e25fSSatish Balay   xb = x;
46849b5e25fSSatish Balay 
46949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
47049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
47149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
47249b5e25fSSatish Balay     ib = aj + *ai;
473831a3094SHong Zhang     jmin = 0;
47498c9bda7SSatish Balay     nonzerorow += (n>0);
4757fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
47649b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
47749b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
47849b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
47949b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
48049b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
48149b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
482831a3094SHong Zhang       v += 36; jmin++;
4837fbae186SHong Zhang     }
484831a3094SHong Zhang     for (j=jmin; j<n; j++) {
48549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
48649b5e25fSSatish Balay       cval       = ib[j]*6;
48749b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
48849b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
48949b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
49049b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
49149b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
49249b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
49349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
49449b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
49549b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
49649b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
49749b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
49849b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
49949b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
50049b5e25fSSatish Balay       v  += 36;
50149b5e25fSSatish Balay     }
50249b5e25fSSatish Balay     xb +=6; ai++;
50349b5e25fSSatish Balay   }
50449b5e25fSSatish Balay 
5051ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5061ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
507dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
50849b5e25fSSatish Balay   PetscFunctionReturn(0);
50949b5e25fSSatish Balay }
5104a2ae208SSatish Balay #undef __FUNCT__
5114a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
512dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
51349b5e25fSSatish Balay {
51449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
51587828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
51649b5e25fSSatish Balay   MatScalar      *v;
5176849ba73SBarry Smith   PetscErrorCode ierr;
51813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
51998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
52049b5e25fSSatish Balay 
52149b5e25fSSatish Balay   PetscFunctionBegin;
5222dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5231ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5241ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
52549b5e25fSSatish Balay 
52649b5e25fSSatish Balay   v     = a->a;
52749b5e25fSSatish Balay   xb = x;
52849b5e25fSSatish Balay 
52949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
53049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
53149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
53249b5e25fSSatish Balay     ib = aj + *ai;
533831a3094SHong Zhang     jmin = 0;
53498c9bda7SSatish Balay     nonzerorow += (n>0);
5357fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
53649b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
53749b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
53849b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
53949b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
54049b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
54149b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
54249b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
543831a3094SHong Zhang       v += 49; jmin++;
5447fbae186SHong Zhang     }
545831a3094SHong Zhang     for (j=jmin; j<n; j++) {
54649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
54749b5e25fSSatish Balay       cval       = ib[j]*7;
54849b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
54949b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
55049b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
55149b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
55249b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
55349b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
55449b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
55549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
55649b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
55749b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
55849b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
55949b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
56049b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
56149b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
56249b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
56349b5e25fSSatish Balay       v  += 49;
56449b5e25fSSatish Balay     }
56549b5e25fSSatish Balay     xb +=7; ai++;
56649b5e25fSSatish Balay   }
5671ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5681ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
569dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
57049b5e25fSSatish Balay   PetscFunctionReturn(0);
57149b5e25fSSatish Balay }
57249b5e25fSSatish Balay 
57349b5e25fSSatish Balay /*
57449b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
57549b5e25fSSatish Balay */
5764a2ae208SSatish Balay #undef __FUNCT__
5774a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
578dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
57949b5e25fSSatish Balay {
58049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
58187828ca2SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
5820b60a74dSHong Zhang   MatScalar      *v;
583dfbe8321SBarry Smith   PetscErrorCode ierr;
584d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
58598c9bda7SSatish Balay   PetscInt       nonzerorow=0;
58649b5e25fSSatish Balay 
58749b5e25fSSatish Balay   PetscFunctionBegin;
5882dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5891ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
5901ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
59149b5e25fSSatish Balay 
59249b5e25fSSatish Balay   aj   = a->j;
59349b5e25fSSatish Balay   v    = a->a;
59449b5e25fSSatish Balay   ii   = a->i;
59549b5e25fSSatish Balay 
59649b5e25fSSatish Balay   if (!a->mult_work) {
597d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
59849b5e25fSSatish Balay   }
59949b5e25fSSatish Balay   work = a->mult_work;
60049b5e25fSSatish Balay 
60149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
60249b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
60349b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
60498c9bda7SSatish Balay     nonzerorow += (n>0);
60549b5e25fSSatish Balay 
60649b5e25fSSatish Balay     /* upper triangular part */
60749b5e25fSSatish Balay     for (j=0; j<n; j++) {
60849b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
60949b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
61049b5e25fSSatish Balay       workt += bs;
61149b5e25fSSatish Balay     }
61249b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
61349b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
61449b5e25fSSatish Balay 
61549b5e25fSSatish Balay     /* strict lower triangular part */
616831a3094SHong Zhang     idx = aj+ii[0];
617831a3094SHong Zhang     if (*idx == i){
61896b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
619831a3094SHong Zhang     }
62096b9376eSHong Zhang 
62149b5e25fSSatish Balay     if (ncols > 0){
62249b5e25fSSatish Balay       workt = work;
62387828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
624831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
625831a3094SHong Zhang       for (j=0; j<n; j++) {
626831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
62749b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
62849b5e25fSSatish Balay         workt += bs;
62949b5e25fSSatish Balay       }
63049b5e25fSSatish Balay     }
63149b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
63249b5e25fSSatish Balay   }
63349b5e25fSSatish Balay 
6341ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6351ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
636dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr);
63749b5e25fSSatish Balay   PetscFunctionReturn(0);
63849b5e25fSSatish Balay }
63949b5e25fSSatish Balay 
6406a65c766SHong Zhang extern PetscErrorCode VecCopy_Seq(Vec,Vec);
6414a2ae208SSatish Balay #undef __FUNCT__
6424a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
643dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
64449b5e25fSSatish Balay {
64549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
646bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1;
64749b5e25fSSatish Balay   MatScalar      *v;
6486849ba73SBarry Smith   PetscErrorCode ierr;
64913f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
65098c9bda7SSatish Balay   PetscInt       nonzerorow=0;
65149b5e25fSSatish Balay 
65249b5e25fSSatish Balay   PetscFunctionBegin;
6536a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
6541ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6551ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
65649b5e25fSSatish Balay   v  = a->a;
65749b5e25fSSatish Balay   xb = x;
65849b5e25fSSatish Balay 
65949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
66049b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
66149b5e25fSSatish Balay     x1 = xb[0];
66249b5e25fSSatish Balay     ib = aj + *ai;
663831a3094SHong Zhang     jmin = 0;
66498c9bda7SSatish Balay     nonzerorow += (n>0);
665831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
666831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
667831a3094SHong Zhang     }
668831a3094SHong Zhang     for (j=jmin; j<n; j++) {
66949b5e25fSSatish Balay       cval    = *ib;
67049b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
67149b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
67249b5e25fSSatish Balay     }
67349b5e25fSSatish Balay     xb++; ai++;
67449b5e25fSSatish Balay   }
67549b5e25fSSatish Balay 
6761ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
677bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
67849b5e25fSSatish Balay 
679dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
68049b5e25fSSatish Balay   PetscFunctionReturn(0);
68149b5e25fSSatish Balay }
68249b5e25fSSatish Balay 
6834a2ae208SSatish Balay #undef __FUNCT__
6844a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
685dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
68649b5e25fSSatish Balay {
68749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
688bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2;
68949b5e25fSSatish Balay   MatScalar      *v;
6906849ba73SBarry Smith   PetscErrorCode ierr;
69113f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
69298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
69349b5e25fSSatish Balay 
69449b5e25fSSatish Balay   PetscFunctionBegin;
6956a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
6961ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6971ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
69849b5e25fSSatish Balay 
69949b5e25fSSatish Balay   v  = a->a;
70049b5e25fSSatish Balay   xb = x;
70149b5e25fSSatish Balay 
70249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
70349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
70449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
70549b5e25fSSatish Balay     ib = aj + *ai;
706831a3094SHong Zhang     jmin = 0;
70798c9bda7SSatish Balay     nonzerorow += (n>0);
7087fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
70949b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
71049b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
711831a3094SHong Zhang       v += 4; jmin++;
7127fbae186SHong Zhang     }
713831a3094SHong Zhang     for (j=jmin; j<n; j++) {
71449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
71549b5e25fSSatish Balay       cval       = ib[j]*2;
71649b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
71749b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
71849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
71949b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
72049b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
72149b5e25fSSatish Balay       v  += 4;
72249b5e25fSSatish Balay     }
72349b5e25fSSatish Balay     xb +=2; ai++;
72449b5e25fSSatish Balay   }
7251ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
726bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
72749b5e25fSSatish Balay 
728dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
72949b5e25fSSatish Balay   PetscFunctionReturn(0);
73049b5e25fSSatish Balay }
73149b5e25fSSatish Balay 
7324a2ae208SSatish Balay #undef __FUNCT__
7334a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
734dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
73549b5e25fSSatish Balay {
73649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
737bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3;
73849b5e25fSSatish Balay   MatScalar      *v;
7396849ba73SBarry Smith   PetscErrorCode ierr;
74013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
74198c9bda7SSatish Balay   PetscInt       nonzerorow=0;
74249b5e25fSSatish Balay 
74349b5e25fSSatish Balay   PetscFunctionBegin;
7446a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7451ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7461ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
74749b5e25fSSatish Balay 
74849b5e25fSSatish Balay   v     = a->a;
74949b5e25fSSatish Balay   xb = x;
75049b5e25fSSatish Balay 
75149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
75249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
75349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
75449b5e25fSSatish Balay     ib = aj + *ai;
755831a3094SHong Zhang     jmin = 0;
75698c9bda7SSatish Balay     nonzerorow += (n>0);
7577fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
75849b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
75949b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
76049b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
761831a3094SHong Zhang      v += 9; jmin++;
7627fbae186SHong Zhang     }
763831a3094SHong Zhang     for (j=jmin; j<n; j++) {
76449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
76549b5e25fSSatish Balay       cval       = ib[j]*3;
76649b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
76749b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
76849b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
76949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
77049b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
77149b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
77249b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
77349b5e25fSSatish Balay       v  += 9;
77449b5e25fSSatish Balay     }
77549b5e25fSSatish Balay     xb +=3; ai++;
77649b5e25fSSatish Balay   }
77749b5e25fSSatish Balay 
7781ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
779bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
78049b5e25fSSatish Balay 
781dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
78249b5e25fSSatish Balay   PetscFunctionReturn(0);
78349b5e25fSSatish Balay }
78449b5e25fSSatish Balay 
7854a2ae208SSatish Balay #undef __FUNCT__
7864a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
787dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
78849b5e25fSSatish Balay {
78949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
790bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
79149b5e25fSSatish Balay   MatScalar      *v;
7926849ba73SBarry Smith   PetscErrorCode ierr;
79313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
79498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
79549b5e25fSSatish Balay 
79649b5e25fSSatish Balay   PetscFunctionBegin;
7976a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7981ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7991ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
80049b5e25fSSatish Balay 
80149b5e25fSSatish Balay   v     = a->a;
80249b5e25fSSatish Balay   xb = x;
80349b5e25fSSatish Balay 
80449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
80549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
80649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
80749b5e25fSSatish Balay     ib = aj + *ai;
808831a3094SHong Zhang     jmin = 0;
80998c9bda7SSatish Balay     nonzerorow += (n>0);
8107fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
81149b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
81249b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
81349b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
81449b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
815831a3094SHong Zhang       v += 16; jmin++;
8167fbae186SHong Zhang     }
817831a3094SHong Zhang     for (j=jmin; j<n; j++) {
81849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
81949b5e25fSSatish Balay       cval       = ib[j]*4;
82049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
82149b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
82249b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
82349b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
82449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
82549b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
82649b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
82749b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
82849b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
82949b5e25fSSatish Balay       v  += 16;
83049b5e25fSSatish Balay     }
83149b5e25fSSatish Balay     xb +=4; ai++;
83249b5e25fSSatish Balay   }
83349b5e25fSSatish Balay 
8341ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
835bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
83649b5e25fSSatish Balay 
837dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
83849b5e25fSSatish Balay   PetscFunctionReturn(0);
83949b5e25fSSatish Balay }
84049b5e25fSSatish Balay 
8414a2ae208SSatish Balay #undef __FUNCT__
8424a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
843dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
84449b5e25fSSatish Balay {
84549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
846bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
84749b5e25fSSatish Balay   MatScalar      *v;
8486849ba73SBarry Smith   PetscErrorCode ierr;
84913f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
85098c9bda7SSatish Balay   PetscInt       nonzerorow=0;
85149b5e25fSSatish Balay 
85249b5e25fSSatish Balay   PetscFunctionBegin;
8536a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8541ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8551ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
85649b5e25fSSatish Balay 
85749b5e25fSSatish Balay   v     = a->a;
85849b5e25fSSatish Balay   xb = x;
85949b5e25fSSatish Balay 
86049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
86149b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
86249b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
86349b5e25fSSatish Balay     ib = aj + *ai;
864831a3094SHong Zhang     jmin = 0;
86598c9bda7SSatish Balay     nonzerorow += (n>0);
8667fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
86749b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
86849b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
86949b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
87049b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
87149b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
872831a3094SHong Zhang       v += 25; jmin++;
8737fbae186SHong Zhang     }
874831a3094SHong Zhang     for (j=jmin; j<n; j++) {
87549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
87649b5e25fSSatish Balay       cval       = ib[j]*5;
87749b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
87849b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
87949b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
88049b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
88149b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
88249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
88349b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
88449b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
88549b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
88649b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
88749b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
88849b5e25fSSatish Balay       v  += 25;
88949b5e25fSSatish Balay     }
89049b5e25fSSatish Balay     xb +=5; ai++;
89149b5e25fSSatish Balay   }
89249b5e25fSSatish Balay 
8931ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
894bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
89549b5e25fSSatish Balay 
896dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
89749b5e25fSSatish Balay   PetscFunctionReturn(0);
89849b5e25fSSatish Balay }
8994a2ae208SSatish Balay #undef __FUNCT__
9004a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
901dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
90249b5e25fSSatish Balay {
90349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
904bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
90549b5e25fSSatish Balay   MatScalar      *v;
9066849ba73SBarry Smith   PetscErrorCode ierr;
90713f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
90898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
90949b5e25fSSatish Balay 
91049b5e25fSSatish Balay   PetscFunctionBegin;
9116a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9121ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9131ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
91449b5e25fSSatish Balay 
91549b5e25fSSatish Balay   v     = a->a;
91649b5e25fSSatish Balay   xb = x;
91749b5e25fSSatish Balay 
91849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
91949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
92049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
92149b5e25fSSatish Balay     ib = aj + *ai;
922831a3094SHong Zhang     jmin = 0;
92398c9bda7SSatish Balay     nonzerorow += (n>0);
9247fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
92549b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
92649b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
92749b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
92849b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
92949b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
93049b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
931831a3094SHong Zhang       v += 36; jmin++;
9327fbae186SHong Zhang     }
933831a3094SHong Zhang     for (j=jmin; j<n; j++) {
93449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
93549b5e25fSSatish Balay       cval       = ib[j]*6;
93649b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
93749b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
93849b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
93949b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
94049b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
94149b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
94249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
94349b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
94449b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
94549b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
94649b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
94749b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
94849b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
94949b5e25fSSatish Balay       v  += 36;
95049b5e25fSSatish Balay     }
95149b5e25fSSatish Balay     xb +=6; ai++;
95249b5e25fSSatish Balay   }
95349b5e25fSSatish Balay 
9541ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
955bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
95649b5e25fSSatish Balay 
957dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
95849b5e25fSSatish Balay   PetscFunctionReturn(0);
95949b5e25fSSatish Balay }
96049b5e25fSSatish Balay 
9614a2ae208SSatish Balay #undef __FUNCT__
9624a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
963dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
96449b5e25fSSatish Balay {
96549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
966bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
96749b5e25fSSatish Balay   MatScalar      *v;
9686849ba73SBarry Smith   PetscErrorCode ierr;
96913f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
97098c9bda7SSatish Balay   PetscInt       nonzerorow=0;
97149b5e25fSSatish Balay 
97249b5e25fSSatish Balay   PetscFunctionBegin;
9736a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9741ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9751ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
97649b5e25fSSatish Balay 
97749b5e25fSSatish Balay   v     = a->a;
97849b5e25fSSatish Balay   xb = x;
97949b5e25fSSatish Balay 
98049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
98149b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
98249b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
98349b5e25fSSatish Balay     ib = aj + *ai;
984831a3094SHong Zhang     jmin = 0;
98598c9bda7SSatish Balay     nonzerorow += (n>0);
9867fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
98749b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
98849b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
98949b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
99049b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
99149b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
99249b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
99349b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
994831a3094SHong Zhang       v += 49; jmin++;
9957fbae186SHong Zhang     }
996831a3094SHong Zhang     for (j=jmin; j<n; j++) {
99749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
99849b5e25fSSatish Balay       cval       = ib[j]*7;
99949b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
100049b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
100149b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
100249b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
100349b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
100449b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
100549b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
100649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
100749b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
100849b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
100949b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
101049b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
101149b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
101249b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
101349b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
101449b5e25fSSatish Balay       v  += 49;
101549b5e25fSSatish Balay     }
101649b5e25fSSatish Balay     xb +=7; ai++;
101749b5e25fSSatish Balay   }
101849b5e25fSSatish Balay 
10191ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1020bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
102149b5e25fSSatish Balay 
1022dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
102349b5e25fSSatish Balay   PetscFunctionReturn(0);
102449b5e25fSSatish Balay }
102549b5e25fSSatish Balay 
10264a2ae208SSatish Balay #undef __FUNCT__
10274a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1028dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
102949b5e25fSSatish Balay {
103049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1031bba805e6SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1032066653e3SSatish Balay   MatScalar      *v;
1033dfbe8321SBarry Smith   PetscErrorCode ierr;
1034d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
103598c9bda7SSatish Balay   PetscInt       nonzerorow=0;
103649b5e25fSSatish Balay 
103749b5e25fSSatish Balay   PetscFunctionBegin;
10386a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
10391ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
10401ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
104149b5e25fSSatish Balay 
104249b5e25fSSatish Balay   aj   = a->j;
104349b5e25fSSatish Balay   v    = a->a;
104449b5e25fSSatish Balay   ii   = a->i;
104549b5e25fSSatish Balay 
104649b5e25fSSatish Balay   if (!a->mult_work) {
1047d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
104849b5e25fSSatish Balay   }
104949b5e25fSSatish Balay   work = a->mult_work;
105049b5e25fSSatish Balay 
105149b5e25fSSatish Balay 
105249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
105349b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
105449b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
105598c9bda7SSatish Balay     nonzerorow += (n>0);
105649b5e25fSSatish Balay 
105749b5e25fSSatish Balay     /* upper triangular part */
105849b5e25fSSatish Balay     for (j=0; j<n; j++) {
105949b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
106049b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
106149b5e25fSSatish Balay       workt += bs;
106249b5e25fSSatish Balay     }
106349b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
106449b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
106549b5e25fSSatish Balay 
106649b5e25fSSatish Balay     /* strict lower triangular part */
1067831a3094SHong Zhang     idx = aj+ii[0];
1068831a3094SHong Zhang     if (*idx == i){
106996b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1070831a3094SHong Zhang     }
107149b5e25fSSatish Balay     if (ncols > 0){
107249b5e25fSSatish Balay       workt = work;
107387828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1074831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1075831a3094SHong Zhang       for (j=0; j<n; j++) {
1076831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
107749b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
107849b5e25fSSatish Balay         workt += bs;
107949b5e25fSSatish Balay       }
108049b5e25fSSatish Balay     }
108149b5e25fSSatish Balay 
108249b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
108349b5e25fSSatish Balay   }
108449b5e25fSSatish Balay 
10851ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1086bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
108749b5e25fSSatish Balay 
1088dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
108949b5e25fSSatish Balay   PetscFunctionReturn(0);
109049b5e25fSSatish Balay }
109149b5e25fSSatish Balay 
10924a2ae208SSatish Balay #undef __FUNCT__
10934a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1094f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
109549b5e25fSSatish Balay {
109649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1097f4df32b1SMatthew Knepley   PetscScalar    oalpha = alpha;
1098efee365bSSatish Balay   PetscErrorCode ierr;
10990805154bSBarry Smith   PetscBLASInt   one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz);
110049b5e25fSSatish Balay 
110149b5e25fSSatish Balay   PetscFunctionBegin;
1102f4df32b1SMatthew Knepley   BLASscal_(&totalnz,&oalpha,a->a,&one);
1103efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
110449b5e25fSSatish Balay   PetscFunctionReturn(0);
110549b5e25fSSatish Balay }
110649b5e25fSSatish Balay 
11074a2ae208SSatish Balay #undef __FUNCT__
11084a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1109dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
111049b5e25fSSatish Balay {
111149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
111249b5e25fSSatish Balay   MatScalar      *v = a->a;
111349b5e25fSSatish Balay   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1114d0f46423SBarry Smith   PetscInt       i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
11156849ba73SBarry Smith   PetscErrorCode ierr;
111613f74950SBarry Smith   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
111749b5e25fSSatish Balay 
111849b5e25fSSatish Balay   PetscFunctionBegin;
111949b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
112049b5e25fSSatish Balay     for (k=0; k<mbs; k++){
112149b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1122831a3094SHong Zhang       col  = aj + jmin;
1123831a3094SHong Zhang       if (*col == k){         /* diagonal block */
112449b5e25fSSatish Balay         for (i=0; i<bs2; i++){
112549b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
112649b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
112749b5e25fSSatish Balay #else
112849b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
112949b5e25fSSatish Balay #endif
113049b5e25fSSatish Balay         }
1131831a3094SHong Zhang         jmin++;
1132831a3094SHong Zhang       }
1133831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
113449b5e25fSSatish Balay         for (i=0; i<bs2; i++){
113549b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
113649b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
113749b5e25fSSatish Balay #else
113849b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
113949b5e25fSSatish Balay #endif
114049b5e25fSSatish Balay         }
114149b5e25fSSatish Balay       }
114249b5e25fSSatish Balay     }
114349b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
11440b8dc8d2SHong Zhang   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
114574ed9c26SBarry Smith     ierr = PetscMalloc3(bs,PetscReal,&sum,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
11460b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11470b8dc8d2SHong Zhang     il[0] = 0;
114849b5e25fSSatish Balay 
114949b5e25fSSatish Balay     *norm = 0.0;
115049b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
115149b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
115249b5e25fSSatish Balay       /*-- col sum --*/
115349b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1154831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1155831a3094SHong Zhang                   at step k */
115649b5e25fSSatish Balay       while (i<mbs){
115749b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
115849b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
115949b5e25fSSatish Balay         for (j=0; j<bs; j++){
116049b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
116149b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
116249b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
116349b5e25fSSatish Balay           }
116449b5e25fSSatish Balay         }
116549b5e25fSSatish Balay         /* update il, jl */
1166831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1167831a3094SHong Zhang         jmax = a->i[i+1];
116849b5e25fSSatish Balay         if (jmin < jmax){
116949b5e25fSSatish Balay           il[i] = jmin;
117049b5e25fSSatish Balay           j   = a->j[jmin];
117149b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
117249b5e25fSSatish Balay         }
117349b5e25fSSatish Balay         i = nexti;
117449b5e25fSSatish Balay       }
117549b5e25fSSatish Balay       /*-- row sum --*/
117649b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
117749b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
117849b5e25fSSatish Balay         for (j=0; j<bs; j++){
117949b5e25fSSatish Balay           v = a->a + i*bs2 + j;
118049b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
11810b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
118249b5e25fSSatish Balay           }
118349b5e25fSSatish Balay         }
118449b5e25fSSatish Balay       }
118549b5e25fSSatish Balay       /* add k_th block row to il, jl */
1186831a3094SHong Zhang       col = aj+jmin;
1187831a3094SHong Zhang       if (*col == k) jmin++;
118849b5e25fSSatish Balay       if (jmin < jmax){
118949b5e25fSSatish Balay         il[k] = jmin;
11900b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
119149b5e25fSSatish Balay       }
119249b5e25fSSatish Balay       for (j=0; j<bs; j++){
119349b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
119449b5e25fSSatish Balay       }
119549b5e25fSSatish Balay     }
119674ed9c26SBarry Smith     ierr = PetscFree3(sum,il,jl);CHKERRQ(ierr);
119749b5e25fSSatish Balay   } else {
1198e32f2f54SBarry Smith     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
119949b5e25fSSatish Balay   }
120049b5e25fSSatish Balay   PetscFunctionReturn(0);
120149b5e25fSSatish Balay }
120249b5e25fSSatish Balay 
12034a2ae208SSatish Balay #undef __FUNCT__
12044a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1205dfbe8321SBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
120649b5e25fSSatish Balay {
120749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1208dfbe8321SBarry Smith   PetscErrorCode ierr;
120949b5e25fSSatish Balay 
121049b5e25fSSatish Balay   PetscFunctionBegin;
121149b5e25fSSatish Balay 
121249b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1213d0f46423SBarry Smith   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1214ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1215ef511fbeSHong Zhang     PetscFunctionReturn(0);
121649b5e25fSSatish Balay   }
121749b5e25fSSatish Balay 
121849b5e25fSSatish Balay   /* if the a->i are the same */
121913f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1220abc0a331SBarry Smith   if (!*flg) {
122149b5e25fSSatish Balay     PetscFunctionReturn(0);
122249b5e25fSSatish Balay   }
122349b5e25fSSatish Balay 
122449b5e25fSSatish Balay   /* if a->j are the same */
122513f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1226abc0a331SBarry Smith   if (!*flg) {
122749b5e25fSSatish Balay     PetscFunctionReturn(0);
122849b5e25fSSatish Balay   }
122949b5e25fSSatish Balay   /* if a->a are the same */
1230d0f46423SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1231935af2e7SHong Zhang   PetscFunctionReturn(0);
123249b5e25fSSatish Balay }
123349b5e25fSSatish Balay 
12344a2ae208SSatish Balay #undef __FUNCT__
12354a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1236dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
123749b5e25fSSatish Balay {
123849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1239dfbe8321SBarry Smith   PetscErrorCode ierr;
124013f74950SBarry Smith   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
124187828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
124249b5e25fSSatish Balay   MatScalar      *aa,*aa_j;
124349b5e25fSSatish Balay 
124449b5e25fSSatish Balay   PetscFunctionBegin;
1245d0f46423SBarry Smith   bs   = A->rmap->bs;
1246e32f2f54SBarry Smith   if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
124782799104SHong Zhang 
124849b5e25fSSatish Balay   aa   = a->a;
124949b5e25fSSatish Balay   ai   = a->i;
125049b5e25fSSatish Balay   aj   = a->j;
125149b5e25fSSatish Balay   ambs = a->mbs;
125249b5e25fSSatish Balay   bs2  = a->bs2;
125349b5e25fSSatish Balay 
12542dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
12551ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
125649b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1257e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
125849b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
125949b5e25fSSatish Balay     j=ai[i];
126049b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
126149b5e25fSSatish Balay       row  = i*bs;
126249b5e25fSSatish Balay       aa_j = aa + j*bs2;
1263d5f3da31SBarry Smith       if (A->factortype && bs==1){
126482799104SHong Zhang         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
126582799104SHong Zhang       } else {
126649b5e25fSSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
126749b5e25fSSatish Balay       }
126849b5e25fSSatish Balay     }
126982799104SHong Zhang   }
127082799104SHong Zhang 
12711ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
127249b5e25fSSatish Balay   PetscFunctionReturn(0);
127349b5e25fSSatish Balay }
127449b5e25fSSatish Balay 
12754a2ae208SSatish Balay #undef __FUNCT__
12764a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1277dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
127849b5e25fSSatish Balay {
127949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
12805e90f9d9SHong Zhang   PetscScalar    *l,x,*li,*ri;
128149b5e25fSSatish Balay   MatScalar      *aa,*v;
1282dfbe8321SBarry Smith   PetscErrorCode ierr;
12835e90f9d9SHong Zhang   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1284b3bf805bSHong Zhang   PetscTruth     flg;
128549b5e25fSSatish Balay 
128649b5e25fSSatish Balay   PetscFunctionBegin;
1287b3bf805bSHong Zhang   if (ll != rr){
1288b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1289e7e72b3dSBarry Smith     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1290b3bf805bSHong Zhang   }
1291b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
129249b5e25fSSatish Balay   ai  = a->i;
129349b5e25fSSatish Balay   aj  = a->j;
129449b5e25fSSatish Balay   aa  = a->a;
1295d0f46423SBarry Smith   m   = A->rmap->N;
1296d0f46423SBarry Smith   bs  = A->rmap->bs;
129749b5e25fSSatish Balay   mbs = a->mbs;
129849b5e25fSSatish Balay   bs2 = a->bs2;
129949b5e25fSSatish Balay 
13001ebc52fbSHong Zhang   ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
130149b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1302e32f2f54SBarry Smith   if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
130349b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
130449b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
130549b5e25fSSatish Balay     li = l + i*bs;
130649b5e25fSSatish Balay     v  = aa + bs2*ai[i];
130749b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
130849b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
13095e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
131049b5e25fSSatish Balay         x = ri[k];
131149b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
131249b5e25fSSatish Balay       }
131349b5e25fSSatish Balay     }
131449b5e25fSSatish Balay   }
13151ebc52fbSHong Zhang   ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1316dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
131749b5e25fSSatish Balay   PetscFunctionReturn(0);
131849b5e25fSSatish Balay }
131949b5e25fSSatish Balay 
13204a2ae208SSatish Balay #undef __FUNCT__
13214a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1322dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
132349b5e25fSSatish Balay {
132449b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
132549b5e25fSSatish Balay 
132649b5e25fSSatish Balay   PetscFunctionBegin;
132749b5e25fSSatish Balay   info->block_size     = a->bs2;
1328*ceed8ce5SJed Brown   info->nz_allocated   = a->bs2*a->maxnz; /*num. of nonzeros in upper triangular part */
13296c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
133049b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
133149b5e25fSSatish Balay   info->assemblies   = A->num_ass;
13328e58a170SBarry Smith   info->mallocs      = A->info.mallocs;
13337adad957SLisandro Dalcin   info->memory       = ((PetscObject)A)->mem;
1334d5f3da31SBarry Smith   if (A->factortype) {
133549b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
133649b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
133749b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
133849b5e25fSSatish Balay   } else {
133949b5e25fSSatish Balay     info->fill_ratio_given  = 0;
134049b5e25fSSatish Balay     info->fill_ratio_needed = 0;
134149b5e25fSSatish Balay     info->factor_mallocs    = 0;
134249b5e25fSSatish Balay   }
134349b5e25fSSatish Balay   PetscFunctionReturn(0);
134449b5e25fSSatish Balay }
134549b5e25fSSatish Balay 
134649b5e25fSSatish Balay 
13474a2ae208SSatish Balay #undef __FUNCT__
13484a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1349dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
135049b5e25fSSatish Balay {
135149b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1352dfbe8321SBarry Smith   PetscErrorCode ierr;
135349b5e25fSSatish Balay 
135449b5e25fSSatish Balay   PetscFunctionBegin;
135549b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
135649b5e25fSSatish Balay   PetscFunctionReturn(0);
135749b5e25fSSatish Balay }
1358dc354874SHong Zhang 
13594a2ae208SSatish Balay #undef __FUNCT__
1360985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ"
1361985db425SBarry Smith /*
1362985db425SBarry Smith    This code does not work since it only checks the upper triangular part of
1363985db425SBarry Smith   the matrix. Hence it is not listed in the function table.
1364985db425SBarry Smith */
1365985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1366dc354874SHong Zhang {
1367dc354874SHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1368dfbe8321SBarry Smith   PetscErrorCode ierr;
136913f74950SBarry Smith   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1370c3fca9a7SHong Zhang   PetscReal      atmp;
1371273d9f13SBarry Smith   MatScalar      *aa;
1372985db425SBarry Smith   PetscScalar    *x;
137313f74950SBarry Smith   PetscInt       ncols,brow,bcol,krow,kcol;
1374dc354874SHong Zhang 
1375dc354874SHong Zhang   PetscFunctionBegin;
1376e32f2f54SBarry Smith   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1377e32f2f54SBarry Smith   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1378d0f46423SBarry Smith   bs   = A->rmap->bs;
1379dc354874SHong Zhang   aa   = a->a;
1380dc354874SHong Zhang   ai   = a->i;
1381dc354874SHong Zhang   aj   = a->j;
138244117c81SHong Zhang   mbs = a->mbs;
1383dc354874SHong Zhang 
1384985db425SBarry Smith   ierr = VecSet(v,0.0);CHKERRQ(ierr);
13851ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1386dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1387e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
138844117c81SHong Zhang   for (i=0; i<mbs; i++) {
1389d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1390d0f6400bSHong Zhang     brow  = bs*i;
139144117c81SHong Zhang     for (j=0; j<ncols; j++){
1392d0f6400bSHong Zhang       bcol = bs*(*aj);
139344117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1394d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
139544117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1396d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1397d0f6400bSHong Zhang           row = brow + krow;    /* row index */
1398c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1399c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
140044117c81SHong Zhang         }
140144117c81SHong Zhang       }
1402d0f6400bSHong Zhang       aj++;
1403dc354874SHong Zhang     }
1404dc354874SHong Zhang   }
14051ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1406dc354874SHong Zhang   PetscFunctionReturn(0);
1407dc354874SHong Zhang }
1408