xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision db41cccff32efbb513e6f5a641add0fbc314552d)
149b5e25fSSatish Balay 
2c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>
3c6db04a5SJed Brown #include <../src/mat/blockinvert.h>
4c6db04a5SJed Brown #include <petscbt.h>
5c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
6c6db04a5SJed Brown #include <petscblaslapack.h>
749b5e25fSSatish Balay 
84a2ae208SSatish Balay #undef __FUNCT__
94a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1013f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1149b5e25fSSatish Balay {
125eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
136849ba73SBarry Smith   PetscErrorCode ierr;
145d0c19d7SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
155d0c19d7SBarry Smith   const PetscInt *idx;
16*db41cccfSHong Zhang   PetscBT        table_out,table_in;
17d94109b8SHong Zhang 
18d94109b8SHong Zhang   PetscFunctionBegin;
19e32f2f54SBarry Smith   if (ov < 0)  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20d94109b8SHong Zhang   mbs = a->mbs;
21d94109b8SHong Zhang   ai  = a->i;
22d94109b8SHong Zhang   aj  = a->j;
23d0f46423SBarry Smith   bs  = A->rmap->bs;
24*db41cccfSHong Zhang   ierr = PetscBTCreate(mbs,table_out);CHKERRQ(ierr);
2513f74950SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
26d0f46423SBarry Smith   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
27*db41cccfSHong Zhang   ierr = PetscBTCreate(mbs,table_in);CHKERRQ(ierr);
28d94109b8SHong Zhang 
29d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
30d94109b8SHong Zhang     isz  = 0;
31*db41cccfSHong Zhang     ierr = PetscBTMemzero(mbs,table_out);CHKERRQ(ierr);
32d94109b8SHong Zhang 
33d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
34d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
35d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
36d94109b8SHong Zhang 
37*db41cccfSHong Zhang     /* Enter these into the temp arrays i.e mark table_out[brow], enter brow into new index */
38dbe03f88SHong Zhang     bcol_max = 0;
39d94109b8SHong Zhang     for (j=0; j<n ; ++j){
40d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
41e32f2f54SBarry Smith       if (brow >= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42*db41cccfSHong Zhang       if(!PetscBTLookupSet(table_out,brow)) {
43dbe03f88SHong Zhang         nidx[isz++] = brow;
44dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
45dbe03f88SHong Zhang       }
46d94109b8SHong Zhang     }
47d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
48d94109b8SHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
49d94109b8SHong Zhang 
50d94109b8SHong Zhang     k = 0;
51d94109b8SHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
52*db41cccfSHong Zhang       /* set table_in for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53*db41cccfSHong Zhang       ierr = PetscBTMemzero(mbs,table_in);CHKERRQ(ierr);
54*db41cccfSHong Zhang       for (l=k; l<isz; l++) { ierr = PetscBTSet(table_in,nidx[l]);CHKERRQ(ierr); }
55d94109b8SHong Zhang 
56d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
57d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++){
58d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
59*db41cccfSHong Zhang         if (PetscBTLookup(table_in,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
60d94109b8SHong Zhang           for (l = start; l<end ; l++){
61d94109b8SHong Zhang             bcol = aj[l];
62*db41cccfSHong Zhang             if (!PetscBTLookupSet(table_out,bcol)) {nidx[isz++] = bcol;}
63d94109b8SHong Zhang           }
64d94109b8SHong Zhang           k++;
65d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
66d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
67d94109b8SHong Zhang           for (l = start; l<end ; l++){
68d94109b8SHong Zhang             bcol = aj[l];
69dbe03f88SHong Zhang             if (bcol > bcol_max) break;
70*db41cccfSHong Zhang             if (PetscBTLookup(table_in,bcol)){
71*db41cccfSHong Zhang               if (!PetscBTLookupSet(table_out,brow)) {nidx[isz++] = brow;}
72d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
73d94109b8SHong Zhang             }
74d94109b8SHong Zhang           }
75d94109b8SHong Zhang         }
76d94109b8SHong Zhang       }
77d94109b8SHong Zhang     } /* for each overlap */
78d94109b8SHong Zhang 
79d94109b8SHong Zhang     /* expand the Index Set */
80d94109b8SHong Zhang     for (j=0; j<isz; j++) {
81d94109b8SHong Zhang       for (k=0; k<bs; k++)
82d94109b8SHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
83d94109b8SHong Zhang     }
8470b3c8c7SBarry Smith     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,PETSC_COPY_VALUES,is+i);CHKERRQ(ierr);
85d94109b8SHong Zhang   }
86*db41cccfSHong Zhang   ierr = PetscBTDestroy(table_out);CHKERRQ(ierr);
87d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
88d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
89*db41cccfSHong Zhang   ierr = PetscBTDestroy(table_in);CHKERRQ(ierr);
905eee224dSHong Zhang   PetscFunctionReturn(0);
9149b5e25fSSatish Balay }
9249b5e25fSSatish Balay 
934a2ae208SSatish Balay #undef __FUNCT__
944a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
954aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
9649b5e25fSSatish Balay {
9749b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
986849ba73SBarry Smith   PetscErrorCode ierr;
9913f74950SBarry Smith   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
10013f74950SBarry Smith   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
1015d0c19d7SBarry Smith   PetscInt       nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
1025d0c19d7SBarry Smith   const PetscInt *irow,*aj = a->j,*ai = a->i;
10349b5e25fSSatish Balay   MatScalar      *mat_a;
10449b5e25fSSatish Balay   Mat            C;
105ace3abfcSBarry Smith   PetscBool      flag,sorted;
10649b5e25fSSatish Balay 
10749b5e25fSSatish Balay   PetscFunctionBegin;
108e32f2f54SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
10914ca34e6SBarry Smith   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
110e32f2f54SBarry Smith   if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
11149b5e25fSSatish Balay 
11249b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11349b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11449b5e25fSSatish Balay 
11574ed9c26SBarry Smith   ierr  = PetscMalloc(oldcols*sizeof(PetscInt),&smap);CHKERRQ(ierr);
11674ed9c26SBarry Smith   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
11749b5e25fSSatish Balay   ssmap = smap;
11813f74950SBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
11949b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
12049b5e25fSSatish Balay   /* determine lens of each row */
12149b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12249b5e25fSSatish Balay     kstart  = ai[irow[i]];
12349b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
12449b5e25fSSatish Balay     lens[i] = 0;
12549b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
12649b5e25fSSatish Balay         if (ssmap[aj[k]]) {
12749b5e25fSSatish Balay           lens[i]++;
12849b5e25fSSatish Balay         }
12949b5e25fSSatish Balay       }
13049b5e25fSSatish Balay     }
13149b5e25fSSatish Balay   /* Create and fill new matrix */
13249b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
13349b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
13449b5e25fSSatish Balay 
135e32f2f54SBarry Smith     if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
13613f74950SBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
137e7e72b3dSBarry Smith     if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
13813f74950SBarry Smith     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
13949b5e25fSSatish Balay     C = *B;
14049b5e25fSSatish Balay   } else {
1417adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
142f69a0ea3SMatthew Knepley     ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1437adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
144ab93d7beSBarry Smith     ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr);
14549b5e25fSSatish Balay   }
14649b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
14749b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
14849b5e25fSSatish Balay     row    = irow[i];
14949b5e25fSSatish Balay     kstart = ai[row];
15049b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
15149b5e25fSSatish Balay     mat_i  = c->i[i];
15249b5e25fSSatish Balay     mat_j  = c->j + mat_i;
15349b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
15449b5e25fSSatish Balay     mat_ilen = c->ilen + i;
15549b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
15649b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
15749b5e25fSSatish Balay         *mat_j++ = tcol - 1;
15849b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
15949b5e25fSSatish Balay         mat_a   += bs2;
16049b5e25fSSatish Balay         (*mat_ilen)++;
16149b5e25fSSatish Balay       }
16249b5e25fSSatish Balay     }
16349b5e25fSSatish Balay   }
16449b5e25fSSatish Balay 
16549b5e25fSSatish Balay   /* Free work space */
16649b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
16749b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
16849b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16949b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17049b5e25fSSatish Balay 
17149b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
17249b5e25fSSatish Balay   *B = C;
17349b5e25fSSatish Balay   PetscFunctionReturn(0);
17449b5e25fSSatish Balay }
17549b5e25fSSatish Balay 
1764a2ae208SSatish Balay #undef __FUNCT__
1774a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
1784aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
17949b5e25fSSatish Balay {
18049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
18149b5e25fSSatish Balay   IS             is1;
1826849ba73SBarry Smith   PetscErrorCode ierr;
1835d0c19d7SBarry Smith   PetscInt       *vary,*iary,nrows,i,bs=A->rmap->bs,count;
1845d0c19d7SBarry Smith   const PetscInt *irow;
18549b5e25fSSatish Balay 
18649b5e25fSSatish Balay   PetscFunctionBegin;
187e32f2f54SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
18849b5e25fSSatish Balay 
18949b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
19049b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
19149b5e25fSSatish Balay 
19249b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
19349b5e25fSSatish Balay    and form the IS with compressed IS */
19474ed9c26SBarry Smith   ierr = PetscMalloc2(a->mbs,PetscInt,&vary,a->mbs,PetscInt,&iary);CHKERRQ(ierr);
19574ed9c26SBarry Smith   ierr = PetscMemzero(vary,a->mbs*sizeof(PetscInt));CHKERRQ(ierr);
19649b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
19749b5e25fSSatish Balay 
19849b5e25fSSatish Balay   count = 0;
19949b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
200e32f2f54SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
20149b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
20249b5e25fSSatish Balay   }
20349b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
20470b3c8c7SBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr);
20574ed9c26SBarry Smith   ierr = PetscFree2(vary,iary);CHKERRQ(ierr);
20649b5e25fSSatish Balay 
2074aa3045dSJed Brown   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,scall,B);CHKERRQ(ierr);
20874ed9c26SBarry Smith   ierr = ISDestroy(is1);CHKERRQ(ierr);
20949b5e25fSSatish Balay   PetscFunctionReturn(0);
21049b5e25fSSatish Balay }
21149b5e25fSSatish Balay 
2124a2ae208SSatish Balay #undef __FUNCT__
2134a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
21413f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
21549b5e25fSSatish Balay {
2166849ba73SBarry Smith   PetscErrorCode ierr;
21713f74950SBarry Smith   PetscInt       i;
21849b5e25fSSatish Balay 
21949b5e25fSSatish Balay   PetscFunctionBegin;
22049b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
22182502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
22249b5e25fSSatish Balay   }
22349b5e25fSSatish Balay 
22449b5e25fSSatish Balay   for (i=0; i<n; i++) {
2254aa3045dSJed Brown     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr);
22649b5e25fSSatish Balay   }
22749b5e25fSSatish Balay   PetscFunctionReturn(0);
22849b5e25fSSatish Balay }
22949b5e25fSSatish Balay 
23049b5e25fSSatish Balay /* -------------------------------------------------------*/
23149b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
23249b5e25fSSatish Balay /* -------------------------------------------------------*/
23349b5e25fSSatish Balay 
2344a2ae208SSatish Balay #undef __FUNCT__
2354a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
236dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
23749b5e25fSSatish Balay {
23849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
23987828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
24049b5e25fSSatish Balay   MatScalar      *v;
2416849ba73SBarry Smith   PetscErrorCode ierr;
24213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
24398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
24449b5e25fSSatish Balay 
24549b5e25fSSatish Balay   PetscFunctionBegin;
2462dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2471ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2481ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
24949b5e25fSSatish Balay 
25049b5e25fSSatish Balay   v     = a->a;
25149b5e25fSSatish Balay   xb = x;
25249b5e25fSSatish Balay 
25349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
25449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
25549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
25649b5e25fSSatish Balay     ib = aj + *ai;
257831a3094SHong Zhang     jmin = 0;
25898c9bda7SSatish Balay     nonzerorow += (n>0);
2597fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
26049b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
26149b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
262831a3094SHong Zhang       v += 4; jmin++;
2637fbae186SHong Zhang     }
264444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
265444d8c10SJed Brown     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
266831a3094SHong Zhang     for (j=jmin; j<n; j++) {
26749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
26849b5e25fSSatish Balay       cval       = ib[j]*2;
26949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
27049b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
27149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
27249b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
27349b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
27449b5e25fSSatish Balay       v  += 4;
27549b5e25fSSatish Balay     }
27649b5e25fSSatish Balay     xb +=2; ai++;
27749b5e25fSSatish Balay   }
27849b5e25fSSatish Balay 
2791ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2801ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
281dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
28249b5e25fSSatish Balay   PetscFunctionReturn(0);
28349b5e25fSSatish Balay }
28449b5e25fSSatish Balay 
2854a2ae208SSatish Balay #undef __FUNCT__
2864a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
287dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
28849b5e25fSSatish Balay {
28949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
29087828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
29149b5e25fSSatish Balay   MatScalar      *v;
2926849ba73SBarry Smith   PetscErrorCode ierr;
29313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
29498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
29549b5e25fSSatish Balay 
29649b5e25fSSatish Balay   PetscFunctionBegin;
2972dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2981ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2991ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
30049b5e25fSSatish Balay 
30149b5e25fSSatish Balay   v    = a->a;
30249b5e25fSSatish Balay   xb   = x;
30349b5e25fSSatish Balay 
30449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
30549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
30649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
30749b5e25fSSatish Balay     ib = aj + *ai;
308831a3094SHong Zhang     jmin = 0;
30998c9bda7SSatish Balay     nonzerorow += (n>0);
3107fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
31149b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
31249b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
31349b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
314831a3094SHong Zhang       v += 9; jmin++;
3157fbae186SHong Zhang     }
316444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
317444d8c10SJed Brown     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
318831a3094SHong Zhang     for (j=jmin; j<n; j++) {
31949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
32049b5e25fSSatish Balay       cval       = ib[j]*3;
32149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
32249b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
32349b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
32449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
32549b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
32649b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
32749b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
32849b5e25fSSatish Balay       v  += 9;
32949b5e25fSSatish Balay     }
33049b5e25fSSatish Balay     xb +=3; ai++;
33149b5e25fSSatish Balay   }
33249b5e25fSSatish Balay 
3331ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3341ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
335dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
33649b5e25fSSatish Balay   PetscFunctionReturn(0);
33749b5e25fSSatish Balay }
33849b5e25fSSatish Balay 
3394a2ae208SSatish Balay #undef __FUNCT__
3404a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
341dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
34249b5e25fSSatish Balay {
34349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
34487828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
34549b5e25fSSatish Balay   MatScalar      *v;
3466849ba73SBarry Smith   PetscErrorCode ierr;
34713f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
34898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
34949b5e25fSSatish Balay 
35049b5e25fSSatish Balay   PetscFunctionBegin;
3512dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3521ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3531ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
35449b5e25fSSatish Balay 
35549b5e25fSSatish Balay   v     = a->a;
35649b5e25fSSatish Balay   xb = x;
35749b5e25fSSatish Balay 
35849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
35949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
36049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
36149b5e25fSSatish Balay     ib = aj + *ai;
362831a3094SHong Zhang     jmin = 0;
36398c9bda7SSatish Balay     nonzerorow += (n>0);
3647fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
36549b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
36649b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
36749b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
36849b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
369831a3094SHong Zhang       v += 16; jmin++;
3707fbae186SHong Zhang     }
371444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
372444d8c10SJed Brown     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
373831a3094SHong Zhang     for (j=jmin; j<n; j++) {
37449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
37549b5e25fSSatish Balay       cval       = ib[j]*4;
37649b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
37749b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
37849b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
37949b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
38049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
38149b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
38249b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
38349b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
38449b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
38549b5e25fSSatish Balay       v  += 16;
38649b5e25fSSatish Balay     }
38749b5e25fSSatish Balay     xb +=4; ai++;
38849b5e25fSSatish Balay   }
38949b5e25fSSatish Balay 
3901ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3911ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
392dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
39349b5e25fSSatish Balay   PetscFunctionReturn(0);
39449b5e25fSSatish Balay }
39549b5e25fSSatish Balay 
3964a2ae208SSatish Balay #undef __FUNCT__
3974a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
398dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
39949b5e25fSSatish Balay {
40049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
40187828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
40249b5e25fSSatish Balay   MatScalar      *v;
4036849ba73SBarry Smith   PetscErrorCode ierr;
40413f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
40598c9bda7SSatish Balay   PetscInt       nonzerorow=0;
40649b5e25fSSatish Balay 
40749b5e25fSSatish Balay   PetscFunctionBegin;
4082dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4091ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4101ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
41149b5e25fSSatish Balay 
41249b5e25fSSatish Balay   v     = a->a;
41349b5e25fSSatish Balay   xb = x;
41449b5e25fSSatish Balay 
41549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
41649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
41749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
41849b5e25fSSatish Balay     ib = aj + *ai;
419831a3094SHong Zhang     jmin = 0;
42098c9bda7SSatish Balay     nonzerorow += (n>0);
4217fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
42249b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
42349b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
42449b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
42549b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
42649b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
427831a3094SHong Zhang       v += 25; jmin++;
4287fbae186SHong Zhang     }
429444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
430444d8c10SJed Brown     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
431831a3094SHong Zhang     for (j=jmin; j<n; j++) {
43249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
43349b5e25fSSatish Balay       cval       = ib[j]*5;
43449b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
43549b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
43649b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
43749b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
43849b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
43949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
44049b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
44149b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
44249b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
44349b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
44449b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
44549b5e25fSSatish Balay       v  += 25;
44649b5e25fSSatish Balay     }
44749b5e25fSSatish Balay     xb +=5; ai++;
44849b5e25fSSatish Balay   }
44949b5e25fSSatish Balay 
4501ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4511ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
452dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
45349b5e25fSSatish Balay   PetscFunctionReturn(0);
45449b5e25fSSatish Balay }
45549b5e25fSSatish Balay 
45649b5e25fSSatish Balay 
4574a2ae208SSatish Balay #undef __FUNCT__
4584a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
459dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
46049b5e25fSSatish Balay {
46149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
46287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
46349b5e25fSSatish Balay   MatScalar      *v;
4646849ba73SBarry Smith   PetscErrorCode ierr;
46513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
46698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
46749b5e25fSSatish Balay 
46849b5e25fSSatish Balay   PetscFunctionBegin;
4692dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4701ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4711ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
47249b5e25fSSatish Balay 
47349b5e25fSSatish Balay   v     = a->a;
47449b5e25fSSatish Balay   xb = x;
47549b5e25fSSatish Balay 
47649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
47749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
47849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
47949b5e25fSSatish Balay     ib = aj + *ai;
480831a3094SHong Zhang     jmin = 0;
48198c9bda7SSatish Balay     nonzerorow += (n>0);
4827fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
48349b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
48449b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
48549b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
48649b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
48749b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
48849b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
489831a3094SHong Zhang       v += 36; jmin++;
4907fbae186SHong Zhang     }
491444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
492444d8c10SJed Brown     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
493831a3094SHong Zhang     for (j=jmin; j<n; j++) {
49449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
49549b5e25fSSatish Balay       cval       = ib[j]*6;
49649b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
49749b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
49849b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
49949b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
50049b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
50149b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
50249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
50349b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
50449b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
50549b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
50649b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
50749b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
50849b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
50949b5e25fSSatish Balay       v  += 36;
51049b5e25fSSatish Balay     }
51149b5e25fSSatish Balay     xb +=6; ai++;
51249b5e25fSSatish Balay   }
51349b5e25fSSatish Balay 
5141ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5151ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
516dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
51749b5e25fSSatish Balay   PetscFunctionReturn(0);
51849b5e25fSSatish Balay }
5194a2ae208SSatish Balay #undef __FUNCT__
5204a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
521dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
52249b5e25fSSatish Balay {
52349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
52487828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
52549b5e25fSSatish Balay   MatScalar      *v;
5266849ba73SBarry Smith   PetscErrorCode ierr;
52713f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
52898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
52949b5e25fSSatish Balay 
53049b5e25fSSatish Balay   PetscFunctionBegin;
5312dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5321ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5331ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
53449b5e25fSSatish Balay 
53549b5e25fSSatish Balay   v     = a->a;
53649b5e25fSSatish Balay   xb = x;
53749b5e25fSSatish Balay 
53849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
53949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
54049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
54149b5e25fSSatish Balay     ib = aj + *ai;
542831a3094SHong Zhang     jmin = 0;
54398c9bda7SSatish Balay     nonzerorow += (n>0);
5447fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
54549b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
54649b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
54749b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
54849b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
54949b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
55049b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
55149b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
552831a3094SHong Zhang       v += 49; jmin++;
5537fbae186SHong Zhang     }
554444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
555444d8c10SJed Brown     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
556831a3094SHong Zhang     for (j=jmin; j<n; j++) {
55749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
55849b5e25fSSatish Balay       cval       = ib[j]*7;
55949b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
56049b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
56149b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
56249b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
56349b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
56449b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
56549b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
56649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
56749b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
56849b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
56949b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
57049b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
57149b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
57249b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
57349b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
57449b5e25fSSatish Balay       v  += 49;
57549b5e25fSSatish Balay     }
57649b5e25fSSatish Balay     xb +=7; ai++;
57749b5e25fSSatish Balay   }
5781ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5791ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
580dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
58149b5e25fSSatish Balay   PetscFunctionReturn(0);
58249b5e25fSSatish Balay }
58349b5e25fSSatish Balay 
58449b5e25fSSatish Balay /*
58549b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
58649b5e25fSSatish Balay */
5874a2ae208SSatish Balay #undef __FUNCT__
5884a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
589dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
59049b5e25fSSatish Balay {
59149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
59287828ca2SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
5930b60a74dSHong Zhang   MatScalar      *v;
594dfbe8321SBarry Smith   PetscErrorCode ierr;
595d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
59698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
59749b5e25fSSatish Balay 
59849b5e25fSSatish Balay   PetscFunctionBegin;
5992dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
6001ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
6011ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
60249b5e25fSSatish Balay 
60349b5e25fSSatish Balay   aj   = a->j;
60449b5e25fSSatish Balay   v    = a->a;
60549b5e25fSSatish Balay   ii   = a->i;
60649b5e25fSSatish Balay 
60749b5e25fSSatish Balay   if (!a->mult_work) {
608d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
60949b5e25fSSatish Balay   }
61049b5e25fSSatish Balay   work = a->mult_work;
61149b5e25fSSatish Balay 
61249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
61349b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
61449b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
61598c9bda7SSatish Balay     nonzerorow += (n>0);
61649b5e25fSSatish Balay 
61749b5e25fSSatish Balay     /* upper triangular part */
61849b5e25fSSatish Balay     for (j=0; j<n; j++) {
61949b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
62049b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
62149b5e25fSSatish Balay       workt += bs;
62249b5e25fSSatish Balay     }
62349b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
62449b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
62549b5e25fSSatish Balay 
62649b5e25fSSatish Balay     /* strict lower triangular part */
627831a3094SHong Zhang     idx = aj+ii[0];
628831a3094SHong Zhang     if (*idx == i){
62996b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
630831a3094SHong Zhang     }
63196b9376eSHong Zhang 
63249b5e25fSSatish Balay     if (ncols > 0){
63349b5e25fSSatish Balay       workt = work;
63487828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
635831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
636831a3094SHong Zhang       for (j=0; j<n; j++) {
637831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
63849b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
63949b5e25fSSatish Balay         workt += bs;
64049b5e25fSSatish Balay       }
64149b5e25fSSatish Balay     }
64249b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
64349b5e25fSSatish Balay   }
64449b5e25fSSatish Balay 
6451ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6461ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
647dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr);
64849b5e25fSSatish Balay   PetscFunctionReturn(0);
64949b5e25fSSatish Balay }
65049b5e25fSSatish Balay 
6514a2ae208SSatish Balay #undef __FUNCT__
6524a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
653dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
65449b5e25fSSatish Balay {
65549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
656bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1;
65749b5e25fSSatish Balay   MatScalar      *v;
6586849ba73SBarry Smith   PetscErrorCode ierr;
65913f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
66098c9bda7SSatish Balay   PetscInt       nonzerorow=0;
66149b5e25fSSatish Balay 
66249b5e25fSSatish Balay   PetscFunctionBegin;
663343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
6641ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6651ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
66649b5e25fSSatish Balay   v  = a->a;
66749b5e25fSSatish Balay   xb = x;
66849b5e25fSSatish Balay 
66949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
67049b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
67149b5e25fSSatish Balay     x1 = xb[0];
67249b5e25fSSatish Balay     ib = aj + *ai;
673831a3094SHong Zhang     jmin = 0;
67498c9bda7SSatish Balay     nonzerorow += (n>0);
675831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
676831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
677831a3094SHong Zhang     }
678831a3094SHong Zhang     for (j=jmin; j<n; j++) {
67949b5e25fSSatish Balay       cval    = *ib;
68049b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
68149b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
68249b5e25fSSatish Balay     }
68349b5e25fSSatish Balay     xb++; ai++;
68449b5e25fSSatish Balay   }
68549b5e25fSSatish Balay 
6861ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
687bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
68849b5e25fSSatish Balay 
689dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
69049b5e25fSSatish Balay   PetscFunctionReturn(0);
69149b5e25fSSatish Balay }
69249b5e25fSSatish Balay 
6934a2ae208SSatish Balay #undef __FUNCT__
6944a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
695dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
69649b5e25fSSatish Balay {
69749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
698bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2;
69949b5e25fSSatish Balay   MatScalar      *v;
7006849ba73SBarry Smith   PetscErrorCode ierr;
70113f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
70298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
70349b5e25fSSatish Balay 
70449b5e25fSSatish Balay   PetscFunctionBegin;
705343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
7061ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7071ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
70849b5e25fSSatish Balay 
70949b5e25fSSatish Balay   v  = a->a;
71049b5e25fSSatish Balay   xb = x;
71149b5e25fSSatish Balay 
71249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
71349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
71449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
71549b5e25fSSatish Balay     ib = aj + *ai;
716831a3094SHong Zhang     jmin = 0;
71798c9bda7SSatish Balay     nonzerorow += (n>0);
7187fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
71949b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
72049b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
721831a3094SHong Zhang       v += 4; jmin++;
7227fbae186SHong Zhang     }
723444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
724444d8c10SJed Brown     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
725831a3094SHong Zhang     for (j=jmin; j<n; j++) {
72649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
72749b5e25fSSatish Balay       cval       = ib[j]*2;
72849b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
72949b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
73049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
73149b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
73249b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
73349b5e25fSSatish Balay       v  += 4;
73449b5e25fSSatish Balay     }
73549b5e25fSSatish Balay     xb +=2; ai++;
73649b5e25fSSatish Balay   }
7371ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
738bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
73949b5e25fSSatish Balay 
740dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
74149b5e25fSSatish Balay   PetscFunctionReturn(0);
74249b5e25fSSatish Balay }
74349b5e25fSSatish Balay 
7444a2ae208SSatish Balay #undef __FUNCT__
7454a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
746dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
74749b5e25fSSatish Balay {
74849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
749bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3;
75049b5e25fSSatish Balay   MatScalar      *v;
7516849ba73SBarry Smith   PetscErrorCode ierr;
75213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
75398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
75449b5e25fSSatish Balay 
75549b5e25fSSatish Balay   PetscFunctionBegin;
756343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
7571ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7581ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
75949b5e25fSSatish Balay 
76049b5e25fSSatish Balay   v     = a->a;
76149b5e25fSSatish Balay   xb = x;
76249b5e25fSSatish Balay 
76349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
76449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
76549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
76649b5e25fSSatish Balay     ib = aj + *ai;
767831a3094SHong Zhang     jmin = 0;
76898c9bda7SSatish Balay     nonzerorow += (n>0);
7697fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
77049b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
77149b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
77249b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
773831a3094SHong Zhang      v += 9; jmin++;
7747fbae186SHong Zhang     }
775444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
776444d8c10SJed Brown     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
777831a3094SHong Zhang     for (j=jmin; j<n; j++) {
77849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
77949b5e25fSSatish Balay       cval       = ib[j]*3;
78049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
78149b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
78249b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
78349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
78449b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
78549b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
78649b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
78749b5e25fSSatish Balay       v  += 9;
78849b5e25fSSatish Balay     }
78949b5e25fSSatish Balay     xb +=3; ai++;
79049b5e25fSSatish Balay   }
79149b5e25fSSatish Balay 
7921ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
793bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
79449b5e25fSSatish Balay 
795dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
79649b5e25fSSatish Balay   PetscFunctionReturn(0);
79749b5e25fSSatish Balay }
79849b5e25fSSatish Balay 
7994a2ae208SSatish Balay #undef __FUNCT__
8004a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
801dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
80249b5e25fSSatish Balay {
80349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
804bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
80549b5e25fSSatish Balay   MatScalar      *v;
8066849ba73SBarry Smith   PetscErrorCode ierr;
80713f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
80898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
80949b5e25fSSatish Balay 
81049b5e25fSSatish Balay   PetscFunctionBegin;
811343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
8121ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8131ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
81449b5e25fSSatish Balay 
81549b5e25fSSatish Balay   v     = a->a;
81649b5e25fSSatish Balay   xb = x;
81749b5e25fSSatish Balay 
81849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
81949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
82049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
82149b5e25fSSatish Balay     ib = aj + *ai;
822831a3094SHong Zhang     jmin = 0;
82398c9bda7SSatish Balay     nonzerorow += (n>0);
8247fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
82549b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
82649b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
82749b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
82849b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
829831a3094SHong Zhang       v += 16; jmin++;
8307fbae186SHong Zhang     }
831444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
832444d8c10SJed Brown     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
833831a3094SHong Zhang     for (j=jmin; j<n; j++) {
83449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
83549b5e25fSSatish Balay       cval       = ib[j]*4;
83649b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
83749b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
83849b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
83949b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
84049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
84149b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
84249b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
84349b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
84449b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
84549b5e25fSSatish Balay       v  += 16;
84649b5e25fSSatish Balay     }
84749b5e25fSSatish Balay     xb +=4; ai++;
84849b5e25fSSatish Balay   }
84949b5e25fSSatish Balay 
8501ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
851bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
85249b5e25fSSatish Balay 
853dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
85449b5e25fSSatish Balay   PetscFunctionReturn(0);
85549b5e25fSSatish Balay }
85649b5e25fSSatish Balay 
8574a2ae208SSatish Balay #undef __FUNCT__
8584a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
859dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
86049b5e25fSSatish Balay {
86149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
862bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
86349b5e25fSSatish Balay   MatScalar      *v;
8646849ba73SBarry Smith   PetscErrorCode ierr;
86513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
86698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
86749b5e25fSSatish Balay 
86849b5e25fSSatish Balay   PetscFunctionBegin;
869343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
8701ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8711ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
87249b5e25fSSatish Balay 
87349b5e25fSSatish Balay   v     = a->a;
87449b5e25fSSatish Balay   xb = x;
87549b5e25fSSatish Balay 
87649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
87749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
87849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
87949b5e25fSSatish Balay     ib = aj + *ai;
880831a3094SHong Zhang     jmin = 0;
88198c9bda7SSatish Balay     nonzerorow += (n>0);
8827fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
88349b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
88449b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
88549b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
88649b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
88749b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
888831a3094SHong Zhang       v += 25; jmin++;
8897fbae186SHong Zhang     }
890444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
891444d8c10SJed Brown     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
892831a3094SHong Zhang     for (j=jmin; j<n; j++) {
89349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
89449b5e25fSSatish Balay       cval       = ib[j]*5;
89549b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
89649b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
89749b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
89849b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
89949b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
90049b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
90149b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
90249b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
90349b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
90449b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
90549b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
90649b5e25fSSatish Balay       v  += 25;
90749b5e25fSSatish Balay     }
90849b5e25fSSatish Balay     xb +=5; ai++;
90949b5e25fSSatish Balay   }
91049b5e25fSSatish Balay 
9111ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
912bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
91349b5e25fSSatish Balay 
914dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
91549b5e25fSSatish Balay   PetscFunctionReturn(0);
91649b5e25fSSatish Balay }
9174a2ae208SSatish Balay #undef __FUNCT__
9184a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
919dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
92049b5e25fSSatish Balay {
92149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
922bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
92349b5e25fSSatish Balay   MatScalar      *v;
9246849ba73SBarry Smith   PetscErrorCode ierr;
92513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
92698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
92749b5e25fSSatish Balay 
92849b5e25fSSatish Balay   PetscFunctionBegin;
929343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
9301ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9311ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
93249b5e25fSSatish Balay 
93349b5e25fSSatish Balay   v     = a->a;
93449b5e25fSSatish Balay   xb = x;
93549b5e25fSSatish Balay 
93649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
93749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
93849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
93949b5e25fSSatish Balay     ib = aj + *ai;
940831a3094SHong Zhang     jmin = 0;
94198c9bda7SSatish Balay     nonzerorow += (n>0);
9427fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
94349b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
94449b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
94549b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
94649b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
94749b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
94849b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
949831a3094SHong Zhang       v += 36; jmin++;
9507fbae186SHong Zhang     }
951444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
952444d8c10SJed Brown     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
953831a3094SHong Zhang     for (j=jmin; j<n; j++) {
95449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
95549b5e25fSSatish Balay       cval       = ib[j]*6;
95649b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
95749b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
95849b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
95949b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
96049b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
96149b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
96249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
96349b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
96449b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
96549b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
96649b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
96749b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
96849b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
96949b5e25fSSatish Balay       v  += 36;
97049b5e25fSSatish Balay     }
97149b5e25fSSatish Balay     xb +=6; ai++;
97249b5e25fSSatish Balay   }
97349b5e25fSSatish Balay 
9741ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
975bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
97649b5e25fSSatish Balay 
977dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
97849b5e25fSSatish Balay   PetscFunctionReturn(0);
97949b5e25fSSatish Balay }
98049b5e25fSSatish Balay 
9814a2ae208SSatish Balay #undef __FUNCT__
9824a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
983dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
98449b5e25fSSatish Balay {
98549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
986bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
98749b5e25fSSatish Balay   MatScalar      *v;
9886849ba73SBarry Smith   PetscErrorCode ierr;
98913f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
99098c9bda7SSatish Balay   PetscInt       nonzerorow=0;
99149b5e25fSSatish Balay 
99249b5e25fSSatish Balay   PetscFunctionBegin;
993343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
9941ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9951ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
99649b5e25fSSatish Balay 
99749b5e25fSSatish Balay   v     = a->a;
99849b5e25fSSatish Balay   xb = x;
99949b5e25fSSatish Balay 
100049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
100149b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
100249b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
100349b5e25fSSatish Balay     ib = aj + *ai;
1004831a3094SHong Zhang     jmin = 0;
100598c9bda7SSatish Balay     nonzerorow += (n>0);
10067fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
100749b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
100849b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
100949b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
101049b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
101149b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
101249b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
101349b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1014831a3094SHong Zhang       v += 49; jmin++;
10157fbae186SHong Zhang     }
1016444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
1017444d8c10SJed Brown     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1018831a3094SHong Zhang     for (j=jmin; j<n; j++) {
101949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
102049b5e25fSSatish Balay       cval       = ib[j]*7;
102149b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
102249b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
102349b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
102449b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
102549b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
102649b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
102749b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
102849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
102949b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
103049b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
103149b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
103249b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
103349b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
103449b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
103549b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
103649b5e25fSSatish Balay       v  += 49;
103749b5e25fSSatish Balay     }
103849b5e25fSSatish Balay     xb +=7; ai++;
103949b5e25fSSatish Balay   }
104049b5e25fSSatish Balay 
10411ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1042bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
104349b5e25fSSatish Balay 
1044dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
104549b5e25fSSatish Balay   PetscFunctionReturn(0);
104649b5e25fSSatish Balay }
104749b5e25fSSatish Balay 
10484a2ae208SSatish Balay #undef __FUNCT__
10494a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1050dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
105149b5e25fSSatish Balay {
105249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1053bba805e6SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1054066653e3SSatish Balay   MatScalar      *v;
1055dfbe8321SBarry Smith   PetscErrorCode ierr;
1056d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
105798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
105849b5e25fSSatish Balay 
105949b5e25fSSatish Balay   PetscFunctionBegin;
1060343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
10611ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
10621ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
106349b5e25fSSatish Balay 
106449b5e25fSSatish Balay   aj   = a->j;
106549b5e25fSSatish Balay   v    = a->a;
106649b5e25fSSatish Balay   ii   = a->i;
106749b5e25fSSatish Balay 
106849b5e25fSSatish Balay   if (!a->mult_work) {
1069d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
107049b5e25fSSatish Balay   }
107149b5e25fSSatish Balay   work = a->mult_work;
107249b5e25fSSatish Balay 
107349b5e25fSSatish Balay 
107449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
107549b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
107649b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
107798c9bda7SSatish Balay     nonzerorow += (n>0);
107849b5e25fSSatish Balay 
107949b5e25fSSatish Balay     /* upper triangular part */
108049b5e25fSSatish Balay     for (j=0; j<n; j++) {
108149b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
108249b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
108349b5e25fSSatish Balay       workt += bs;
108449b5e25fSSatish Balay     }
108549b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
108649b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
108749b5e25fSSatish Balay 
108849b5e25fSSatish Balay     /* strict lower triangular part */
1089831a3094SHong Zhang     idx = aj+ii[0];
1090831a3094SHong Zhang     if (*idx == i){
109196b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1092831a3094SHong Zhang     }
109349b5e25fSSatish Balay     if (ncols > 0){
109449b5e25fSSatish Balay       workt = work;
109587828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1096831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1097831a3094SHong Zhang       for (j=0; j<n; j++) {
1098831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
109949b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
110049b5e25fSSatish Balay         workt += bs;
110149b5e25fSSatish Balay       }
110249b5e25fSSatish Balay     }
110349b5e25fSSatish Balay 
110449b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
110549b5e25fSSatish Balay   }
110649b5e25fSSatish Balay 
11071ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1108bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
110949b5e25fSSatish Balay 
1110dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
111149b5e25fSSatish Balay   PetscFunctionReturn(0);
111249b5e25fSSatish Balay }
111349b5e25fSSatish Balay 
11144a2ae208SSatish Balay #undef __FUNCT__
11154a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1116f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
111749b5e25fSSatish Balay {
111849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1119f4df32b1SMatthew Knepley   PetscScalar    oalpha = alpha;
1120efee365bSSatish Balay   PetscErrorCode ierr;
11210805154bSBarry Smith   PetscBLASInt   one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz);
112249b5e25fSSatish Balay 
112349b5e25fSSatish Balay   PetscFunctionBegin;
1124f4df32b1SMatthew Knepley   BLASscal_(&totalnz,&oalpha,a->a,&one);
1125efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
112649b5e25fSSatish Balay   PetscFunctionReturn(0);
112749b5e25fSSatish Balay }
112849b5e25fSSatish Balay 
11294a2ae208SSatish Balay #undef __FUNCT__
11304a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1131dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
113249b5e25fSSatish Balay {
113349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
113449b5e25fSSatish Balay   MatScalar      *v = a->a;
113549b5e25fSSatish Balay   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1136d0f46423SBarry Smith   PetscInt       i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
11376849ba73SBarry Smith   PetscErrorCode ierr;
113813f74950SBarry Smith   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
113949b5e25fSSatish Balay 
114049b5e25fSSatish Balay   PetscFunctionBegin;
114149b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
114249b5e25fSSatish Balay     for (k=0; k<mbs; k++){
114349b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1144831a3094SHong Zhang       col  = aj + jmin;
1145831a3094SHong Zhang       if (*col == k){         /* diagonal block */
114649b5e25fSSatish Balay         for (i=0; i<bs2; i++){
114749b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
114849b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
114949b5e25fSSatish Balay #else
115049b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
115149b5e25fSSatish Balay #endif
115249b5e25fSSatish Balay         }
1153831a3094SHong Zhang         jmin++;
1154831a3094SHong Zhang       }
1155831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
115649b5e25fSSatish Balay         for (i=0; i<bs2; i++){
115749b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
115849b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
115949b5e25fSSatish Balay #else
116049b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
116149b5e25fSSatish Balay #endif
116249b5e25fSSatish Balay         }
116349b5e25fSSatish Balay       }
116449b5e25fSSatish Balay     }
116549b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
11660b8dc8d2SHong Zhang   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
116774ed9c26SBarry Smith     ierr = PetscMalloc3(bs,PetscReal,&sum,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
11680b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11690b8dc8d2SHong Zhang     il[0] = 0;
117049b5e25fSSatish Balay 
117149b5e25fSSatish Balay     *norm = 0.0;
117249b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
117349b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
117449b5e25fSSatish Balay       /*-- col sum --*/
117549b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1176831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1177831a3094SHong Zhang                   at step k */
117849b5e25fSSatish Balay       while (i<mbs){
117949b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
118049b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
118149b5e25fSSatish Balay         for (j=0; j<bs; j++){
118249b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
118349b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
118449b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
118549b5e25fSSatish Balay           }
118649b5e25fSSatish Balay         }
118749b5e25fSSatish Balay         /* update il, jl */
1188831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1189831a3094SHong Zhang         jmax = a->i[i+1];
119049b5e25fSSatish Balay         if (jmin < jmax){
119149b5e25fSSatish Balay           il[i] = jmin;
119249b5e25fSSatish Balay           j   = a->j[jmin];
119349b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
119449b5e25fSSatish Balay         }
119549b5e25fSSatish Balay         i = nexti;
119649b5e25fSSatish Balay       }
119749b5e25fSSatish Balay       /*-- row sum --*/
119849b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
119949b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
120049b5e25fSSatish Balay         for (j=0; j<bs; j++){
120149b5e25fSSatish Balay           v = a->a + i*bs2 + j;
120249b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
12030b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
120449b5e25fSSatish Balay           }
120549b5e25fSSatish Balay         }
120649b5e25fSSatish Balay       }
120749b5e25fSSatish Balay       /* add k_th block row to il, jl */
1208831a3094SHong Zhang       col = aj+jmin;
1209831a3094SHong Zhang       if (*col == k) jmin++;
121049b5e25fSSatish Balay       if (jmin < jmax){
121149b5e25fSSatish Balay         il[k] = jmin;
12120b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
121349b5e25fSSatish Balay       }
121449b5e25fSSatish Balay       for (j=0; j<bs; j++){
121549b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
121649b5e25fSSatish Balay       }
121749b5e25fSSatish Balay     }
121874ed9c26SBarry Smith     ierr = PetscFree3(sum,il,jl);CHKERRQ(ierr);
121949b5e25fSSatish Balay   } else {
1220e32f2f54SBarry Smith     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
122149b5e25fSSatish Balay   }
122249b5e25fSSatish Balay   PetscFunctionReturn(0);
122349b5e25fSSatish Balay }
122449b5e25fSSatish Balay 
12254a2ae208SSatish Balay #undef __FUNCT__
12264a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1227ace3abfcSBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscBool * flg)
122849b5e25fSSatish Balay {
122949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1230dfbe8321SBarry Smith   PetscErrorCode ierr;
123149b5e25fSSatish Balay 
123249b5e25fSSatish Balay   PetscFunctionBegin;
123349b5e25fSSatish Balay 
123449b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1235d0f46423SBarry Smith   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1236ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1237ef511fbeSHong Zhang     PetscFunctionReturn(0);
123849b5e25fSSatish Balay   }
123949b5e25fSSatish Balay 
124049b5e25fSSatish Balay   /* if the a->i are the same */
124113f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1242abc0a331SBarry Smith   if (!*flg) {
124349b5e25fSSatish Balay     PetscFunctionReturn(0);
124449b5e25fSSatish Balay   }
124549b5e25fSSatish Balay 
124649b5e25fSSatish Balay   /* if a->j are the same */
124713f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1248abc0a331SBarry Smith   if (!*flg) {
124949b5e25fSSatish Balay     PetscFunctionReturn(0);
125049b5e25fSSatish Balay   }
125149b5e25fSSatish Balay   /* if a->a are the same */
1252d0f46423SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1253935af2e7SHong Zhang   PetscFunctionReturn(0);
125449b5e25fSSatish Balay }
125549b5e25fSSatish Balay 
12564a2ae208SSatish Balay #undef __FUNCT__
12574a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1258dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
125949b5e25fSSatish Balay {
126049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1261dfbe8321SBarry Smith   PetscErrorCode ierr;
126213f74950SBarry Smith   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
126387828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
126449b5e25fSSatish Balay   MatScalar      *aa,*aa_j;
126549b5e25fSSatish Balay 
126649b5e25fSSatish Balay   PetscFunctionBegin;
1267d0f46423SBarry Smith   bs   = A->rmap->bs;
1268e32f2f54SBarry Smith   if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
126982799104SHong Zhang 
127049b5e25fSSatish Balay   aa   = a->a;
127149b5e25fSSatish Balay   ai   = a->i;
127249b5e25fSSatish Balay   aj   = a->j;
127349b5e25fSSatish Balay   ambs = a->mbs;
127449b5e25fSSatish Balay   bs2  = a->bs2;
127549b5e25fSSatish Balay 
12762dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
12771ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
127849b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1279e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
128049b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
128149b5e25fSSatish Balay     j=ai[i];
128249b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
128349b5e25fSSatish Balay       row  = i*bs;
128449b5e25fSSatish Balay       aa_j = aa + j*bs2;
1285d5f3da31SBarry Smith       if (A->factortype && bs==1){
128682799104SHong Zhang         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
128782799104SHong Zhang       } else {
128849b5e25fSSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
128949b5e25fSSatish Balay       }
129049b5e25fSSatish Balay     }
129182799104SHong Zhang   }
129282799104SHong Zhang 
12931ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
129449b5e25fSSatish Balay   PetscFunctionReturn(0);
129549b5e25fSSatish Balay }
129649b5e25fSSatish Balay 
12974a2ae208SSatish Balay #undef __FUNCT__
12984a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1299dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
130049b5e25fSSatish Balay {
130149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
13025e90f9d9SHong Zhang   PetscScalar    *l,x,*li,*ri;
130349b5e25fSSatish Balay   MatScalar      *aa,*v;
1304dfbe8321SBarry Smith   PetscErrorCode ierr;
13055e90f9d9SHong Zhang   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1306ace3abfcSBarry Smith   PetscBool      flg;
130749b5e25fSSatish Balay 
130849b5e25fSSatish Balay   PetscFunctionBegin;
1309b3bf805bSHong Zhang   if (ll != rr){
1310b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1311e7e72b3dSBarry Smith     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1312b3bf805bSHong Zhang   }
1313b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
131449b5e25fSSatish Balay   ai  = a->i;
131549b5e25fSSatish Balay   aj  = a->j;
131649b5e25fSSatish Balay   aa  = a->a;
1317d0f46423SBarry Smith   m   = A->rmap->N;
1318d0f46423SBarry Smith   bs  = A->rmap->bs;
131949b5e25fSSatish Balay   mbs = a->mbs;
132049b5e25fSSatish Balay   bs2 = a->bs2;
132149b5e25fSSatish Balay 
13221ebc52fbSHong Zhang   ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
132349b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1324e32f2f54SBarry Smith   if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
132549b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
132649b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
132749b5e25fSSatish Balay     li = l + i*bs;
132849b5e25fSSatish Balay     v  = aa + bs2*ai[i];
132949b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
133049b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
13315e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
133249b5e25fSSatish Balay         x = ri[k];
133349b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
133449b5e25fSSatish Balay       }
133549b5e25fSSatish Balay     }
133649b5e25fSSatish Balay   }
13371ebc52fbSHong Zhang   ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1338dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
133949b5e25fSSatish Balay   PetscFunctionReturn(0);
134049b5e25fSSatish Balay }
134149b5e25fSSatish Balay 
13424a2ae208SSatish Balay #undef __FUNCT__
13434a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1344dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
134549b5e25fSSatish Balay {
134649b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
134749b5e25fSSatish Balay 
134849b5e25fSSatish Balay   PetscFunctionBegin;
134949b5e25fSSatish Balay   info->block_size     = a->bs2;
1350ceed8ce5SJed Brown   info->nz_allocated   = a->bs2*a->maxnz; /*num. of nonzeros in upper triangular part */
13516c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
135249b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
135349b5e25fSSatish Balay   info->assemblies   = A->num_ass;
13548e58a170SBarry Smith   info->mallocs      = A->info.mallocs;
13557adad957SLisandro Dalcin   info->memory       = ((PetscObject)A)->mem;
1356d5f3da31SBarry Smith   if (A->factortype) {
135749b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
135849b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
135949b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
136049b5e25fSSatish Balay   } else {
136149b5e25fSSatish Balay     info->fill_ratio_given  = 0;
136249b5e25fSSatish Balay     info->fill_ratio_needed = 0;
136349b5e25fSSatish Balay     info->factor_mallocs    = 0;
136449b5e25fSSatish Balay   }
136549b5e25fSSatish Balay   PetscFunctionReturn(0);
136649b5e25fSSatish Balay }
136749b5e25fSSatish Balay 
136849b5e25fSSatish Balay 
13694a2ae208SSatish Balay #undef __FUNCT__
13704a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1371dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
137249b5e25fSSatish Balay {
137349b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1374dfbe8321SBarry Smith   PetscErrorCode ierr;
137549b5e25fSSatish Balay 
137649b5e25fSSatish Balay   PetscFunctionBegin;
137749b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
137849b5e25fSSatish Balay   PetscFunctionReturn(0);
137949b5e25fSSatish Balay }
1380dc354874SHong Zhang 
13814a2ae208SSatish Balay #undef __FUNCT__
1382985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ"
1383985db425SBarry Smith /*
1384985db425SBarry Smith    This code does not work since it only checks the upper triangular part of
1385985db425SBarry Smith   the matrix. Hence it is not listed in the function table.
1386985db425SBarry Smith */
1387985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1388dc354874SHong Zhang {
1389dc354874SHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1390dfbe8321SBarry Smith   PetscErrorCode ierr;
139113f74950SBarry Smith   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1392c3fca9a7SHong Zhang   PetscReal      atmp;
1393273d9f13SBarry Smith   MatScalar      *aa;
1394985db425SBarry Smith   PetscScalar    *x;
139513f74950SBarry Smith   PetscInt       ncols,brow,bcol,krow,kcol;
1396dc354874SHong Zhang 
1397dc354874SHong Zhang   PetscFunctionBegin;
1398e32f2f54SBarry Smith   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1399e32f2f54SBarry Smith   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1400d0f46423SBarry Smith   bs   = A->rmap->bs;
1401dc354874SHong Zhang   aa   = a->a;
1402dc354874SHong Zhang   ai   = a->i;
1403dc354874SHong Zhang   aj   = a->j;
140444117c81SHong Zhang   mbs = a->mbs;
1405dc354874SHong Zhang 
1406985db425SBarry Smith   ierr = VecSet(v,0.0);CHKERRQ(ierr);
14071ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1408dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1409e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
141044117c81SHong Zhang   for (i=0; i<mbs; i++) {
1411d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1412d0f6400bSHong Zhang     brow  = bs*i;
141344117c81SHong Zhang     for (j=0; j<ncols; j++){
1414d0f6400bSHong Zhang       bcol = bs*(*aj);
141544117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1416d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
141744117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1418d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1419d0f6400bSHong Zhang           row = brow + krow;    /* row index */
1420c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1421c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
142244117c81SHong Zhang         }
142344117c81SHong Zhang       }
1424d0f6400bSHong Zhang       aj++;
1425dc354874SHong Zhang     }
1426dc354874SHong Zhang   }
14271ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1428dc354874SHong Zhang   PetscFunctionReturn(0);
1429dc354874SHong Zhang }
1430