xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision 51f70360820a74b069c99caf36cfc9c42e3d4837)
149b5e25fSSatish Balay 
2c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>
3af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
4c6db04a5SJed Brown #include <petscbt.h>
5c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
6c6db04a5SJed Brown #include <petscblaslapack.h>
749b5e25fSSatish Balay 
84a2ae208SSatish Balay #undef __FUNCT__
94a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1013f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1149b5e25fSSatish Balay {
125eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
136849ba73SBarry Smith   PetscErrorCode ierr;
145d0c19d7SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
155d0c19d7SBarry Smith   const PetscInt *idx;
16db41cccfSHong Zhang   PetscBT        table_out,table_in;
17d94109b8SHong Zhang 
18d94109b8SHong Zhang   PetscFunctionBegin;
19e32f2f54SBarry Smith   if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20d94109b8SHong Zhang   mbs  = a->mbs;
21d94109b8SHong Zhang   ai   = a->i;
22d94109b8SHong Zhang   aj   = a->j;
23d0f46423SBarry Smith   bs   = A->rmap->bs;
2453b8de81SBarry Smith   ierr = PetscBTCreate(mbs,&table_out);CHKERRQ(ierr);
25854ce69bSBarry Smith   ierr = PetscMalloc1(mbs+1,&nidx);CHKERRQ(ierr);
26854ce69bSBarry Smith   ierr = PetscMalloc1(A->rmap->N+1,&nidx2);CHKERRQ(ierr);
2753b8de81SBarry Smith   ierr = PetscBTCreate(mbs,&table_in);CHKERRQ(ierr);
28d94109b8SHong Zhang 
29d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
30d94109b8SHong Zhang     isz  = 0;
31db41cccfSHong Zhang     ierr = PetscBTMemzero(mbs,table_out);CHKERRQ(ierr);
32d94109b8SHong Zhang 
33d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
34d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
35d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
36d94109b8SHong Zhang 
37db41cccfSHong Zhang     /* Enter these into the temp arrays i.e mark table_out[brow], enter brow into new index */
38dbe03f88SHong Zhang     bcol_max = 0;
39d94109b8SHong Zhang     for (j=0; j<n; ++j) {
40d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
41e32f2f54SBarry Smith       if (brow >= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42db41cccfSHong Zhang       if (!PetscBTLookupSet(table_out,brow)) {
43dbe03f88SHong Zhang         nidx[isz++] = brow;
44dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
45dbe03f88SHong Zhang       }
46d94109b8SHong Zhang     }
47d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
486bf464f9SBarry Smith     ierr = ISDestroy(&is[i]);CHKERRQ(ierr);
49d94109b8SHong Zhang 
50d94109b8SHong Zhang     k = 0;
51d94109b8SHong Zhang     for (j=0; j<ov; j++) { /* for each overlap */
52db41cccfSHong Zhang       /* set table_in for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53db41cccfSHong Zhang       ierr = PetscBTMemzero(mbs,table_in);CHKERRQ(ierr);
54db41cccfSHong Zhang       for (l=k; l<isz; l++) { ierr = PetscBTSet(table_in,nidx[l]);CHKERRQ(ierr); }
55d94109b8SHong Zhang 
56d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
57d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++) {
58d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
59db41cccfSHong Zhang         if (PetscBTLookup(table_in,brow)) { /* brow is on nidx - row search: collect all bcol in this brow */
60d94109b8SHong Zhang           for (l = start; l<end; l++) {
61d94109b8SHong Zhang             bcol = aj[l];
62d7b97159SHong Zhang             if (!PetscBTLookupSet(table_out,bcol)) {
63d7b97159SHong Zhang               nidx[isz++] = bcol;
64d7b97159SHong Zhang               if (bcol_max < bcol) bcol_max = bcol;
65d7b97159SHong Zhang             }
66d94109b8SHong Zhang           }
67d94109b8SHong Zhang           k++;
68d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
69d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
70d94109b8SHong Zhang           for (l = start; l<end; l++) {
71d94109b8SHong Zhang             bcol = aj[l];
72dbe03f88SHong Zhang             if (bcol > bcol_max) break;
73db41cccfSHong Zhang             if (PetscBTLookup(table_in,bcol)) {
7426fbe8dcSKarl Rupp               if (!PetscBTLookupSet(table_out,brow)) nidx[isz++] = brow;
75d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
76d94109b8SHong Zhang             }
77d94109b8SHong Zhang           }
78d94109b8SHong Zhang         }
79d94109b8SHong Zhang       }
80d94109b8SHong Zhang     } /* for each overlap */
81d94109b8SHong Zhang 
82d94109b8SHong Zhang     /* expand the Index Set */
83d94109b8SHong Zhang     for (j=0; j<isz; j++) {
8426fbe8dcSKarl Rupp       for (k=0; k<bs; k++) nidx2[j*bs+k] = nidx[j]*bs+k;
85d94109b8SHong Zhang     }
8670b3c8c7SBarry Smith     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,PETSC_COPY_VALUES,is+i);CHKERRQ(ierr);
87d94109b8SHong Zhang   }
8894bacf5dSBarry Smith   ierr = PetscBTDestroy(&table_out);CHKERRQ(ierr);
89d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
90d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
9194bacf5dSBarry Smith   ierr = PetscBTDestroy(&table_in);CHKERRQ(ierr);
925eee224dSHong Zhang   PetscFunctionReturn(0);
9349b5e25fSSatish Balay }
9449b5e25fSSatish Balay 
95847daeccSHong Zhang /* Bseq is non-symmetric SBAIJ matrix, only used internally by PETSc.
96847daeccSHong Zhang         Zero some ops' to avoid invalid usse */
974a2ae208SSatish Balay #undef __FUNCT__
98847daeccSHong Zhang #define __FUNCT__ "MatSeqSBAIJZeroOps_Private"
99847daeccSHong Zhang PetscErrorCode MatSeqSBAIJZeroOps_Private(Mat Bseq)
10049b5e25fSSatish Balay {
1016849ba73SBarry Smith   PetscErrorCode ierr;
10249b5e25fSSatish Balay 
10349b5e25fSSatish Balay   PetscFunctionBegin;
104847daeccSHong Zhang   ierr = MatSetOption(Bseq,MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
105847daeccSHong Zhang   Bseq->ops->mult                   = 0;
106847daeccSHong Zhang   Bseq->ops->multadd                = 0;
107847daeccSHong Zhang   Bseq->ops->multtranspose          = 0;
108847daeccSHong Zhang   Bseq->ops->multtransposeadd       = 0;
109847daeccSHong Zhang   Bseq->ops->lufactor               = 0;
110847daeccSHong Zhang   Bseq->ops->choleskyfactor         = 0;
111847daeccSHong Zhang   Bseq->ops->lufactorsymbolic       = 0;
112847daeccSHong Zhang   Bseq->ops->choleskyfactorsymbolic = 0;
113847daeccSHong Zhang   Bseq->ops->getinertia             = 0;
11449b5e25fSSatish Balay   PetscFunctionReturn(0);
11549b5e25fSSatish Balay }
11649b5e25fSSatish Balay 
1174a2ae208SSatish Balay #undef __FUNCT__
1184a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
1194aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
12049b5e25fSSatish Balay {
1216849ba73SBarry Smith   PetscErrorCode ierr;
12249b5e25fSSatish Balay 
12349b5e25fSSatish Balay   PetscFunctionBegin;
124847daeccSHong Zhang   ierr = MatGetSubMatrix_SeqBAIJ(A,isrow,iscol,scall,B);CHKERRQ(ierr);
125847daeccSHong Zhang 
1268f46ffcaSHong Zhang   if (isrow != iscol) {
1278f46ffcaSHong Zhang     PetscBool isequal;
1288f46ffcaSHong Zhang     ierr = ISEqual(isrow,iscol,&isequal);CHKERRQ(ierr);
129847daeccSHong Zhang     if (!isequal) {
130847daeccSHong Zhang       ierr = MatSeqSBAIJZeroOps_Private(*B);CHKERRQ(ierr);
1318f46ffcaSHong Zhang     }
13249b5e25fSSatish Balay   }
13349b5e25fSSatish Balay   PetscFunctionReturn(0);
13449b5e25fSSatish Balay }
13549b5e25fSSatish Balay 
1364a2ae208SSatish Balay #undef __FUNCT__
1374a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
13813f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
13949b5e25fSSatish Balay {
1406849ba73SBarry Smith   PetscErrorCode ierr;
14113f74950SBarry Smith   PetscInt       i;
142afebec48SHong Zhang   PetscBool      flg;
14349b5e25fSSatish Balay 
14449b5e25fSSatish Balay   PetscFunctionBegin;
145afebec48SHong Zhang   ierr = MatGetSubMatrices_SeqBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
14649b5e25fSSatish Balay   for (i=0; i<n; i++) {
147afebec48SHong Zhang     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
148afebec48SHong Zhang     if (!flg) {
149847daeccSHong Zhang       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
150afebec48SHong Zhang     }
15149b5e25fSSatish Balay   }
15249b5e25fSSatish Balay   PetscFunctionReturn(0);
15349b5e25fSSatish Balay }
15449b5e25fSSatish Balay 
15549b5e25fSSatish Balay /* -------------------------------------------------------*/
15649b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
15749b5e25fSSatish Balay /* -------------------------------------------------------*/
15849b5e25fSSatish Balay 
1594a2ae208SSatish Balay #undef __FUNCT__
1604a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
161dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
16249b5e25fSSatish Balay {
16349b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
164d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,zero=0.0;
165d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
166d9ca1df4SBarry Smith   const MatScalar   *v;
1676849ba73SBarry Smith   PetscErrorCode    ierr;
168d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
169d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
17098c9bda7SSatish Balay   PetscInt          nonzerorow=0;
17149b5e25fSSatish Balay 
17249b5e25fSSatish Balay   PetscFunctionBegin;
1732dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
174d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1751ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
17649b5e25fSSatish Balay 
17749b5e25fSSatish Balay   v  = a->a;
17849b5e25fSSatish Balay   xb = x;
17949b5e25fSSatish Balay 
18049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
18149b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
18249b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1];
18349b5e25fSSatish Balay     ib          = aj + *ai;
184831a3094SHong Zhang     jmin        = 0;
18598c9bda7SSatish Balay     nonzerorow += (n>0);
1867fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
18749b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
18849b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
189831a3094SHong Zhang       v        += 4; jmin++;
1907fbae186SHong Zhang     }
191444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
192444d8c10SJed Brown     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
193831a3094SHong Zhang     for (j=jmin; j<n; j++) {
19449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
19549b5e25fSSatish Balay       cval       = ib[j]*2;
19649b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2;
19749b5e25fSSatish Balay       z[cval+1] += v[2]*x1 + v[3]*x2;
19849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
19949b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
20049b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
20149b5e25fSSatish Balay       v        += 4;
20249b5e25fSSatish Balay     }
20349b5e25fSSatish Balay     xb +=2; ai++;
20449b5e25fSSatish Balay   }
20549b5e25fSSatish Balay 
206d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2071ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
208dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
20949b5e25fSSatish Balay   PetscFunctionReturn(0);
21049b5e25fSSatish Balay }
21149b5e25fSSatish Balay 
2124a2ae208SSatish Balay #undef __FUNCT__
2134a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
214dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
21549b5e25fSSatish Balay {
21649b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
217d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,zero=0.0;
218d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
219d9ca1df4SBarry Smith   const MatScalar   *v;
2206849ba73SBarry Smith   PetscErrorCode    ierr;
221d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
222d9ca1df4SBarry Smith   const PetscInt    *aj = a->j,*ai = a->i,*ib;
22398c9bda7SSatish Balay   PetscInt          nonzerorow=0;
22449b5e25fSSatish Balay 
22549b5e25fSSatish Balay   PetscFunctionBegin;
2262dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
227d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2281ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
22949b5e25fSSatish Balay 
23049b5e25fSSatish Balay   v  = a->a;
23149b5e25fSSatish Balay   xb = x;
23249b5e25fSSatish Balay 
23349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
23449b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
23549b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2];
23649b5e25fSSatish Balay     ib          = aj + *ai;
237831a3094SHong Zhang     jmin        = 0;
23898c9bda7SSatish Balay     nonzerorow += (n>0);
2397fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
24049b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
24149b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
24249b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
243831a3094SHong Zhang       v        += 9; jmin++;
2447fbae186SHong Zhang     }
245444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
246444d8c10SJed Brown     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
247831a3094SHong Zhang     for (j=jmin; j<n; j++) {
24849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
24949b5e25fSSatish Balay       cval       = ib[j]*3;
25049b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3;
25149b5e25fSSatish Balay       z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
25249b5e25fSSatish Balay       z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
25349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
25449b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
25549b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
25649b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
25749b5e25fSSatish Balay       v        += 9;
25849b5e25fSSatish Balay     }
25949b5e25fSSatish Balay     xb +=3; ai++;
26049b5e25fSSatish Balay   }
26149b5e25fSSatish Balay 
262d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2631ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
264dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
26549b5e25fSSatish Balay   PetscFunctionReturn(0);
26649b5e25fSSatish Balay }
26749b5e25fSSatish Balay 
2684a2ae208SSatish Balay #undef __FUNCT__
2694a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
270dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
27149b5e25fSSatish Balay {
27249b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
273d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4,zero=0.0;
274d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
275d9ca1df4SBarry Smith   const MatScalar   *v;
2766849ba73SBarry Smith   PetscErrorCode    ierr;
277d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
278d9ca1df4SBarry Smith   const PetscInt    *aj = a->j,*ai = a->i,*ib;
27998c9bda7SSatish Balay   PetscInt          nonzerorow = 0;
28049b5e25fSSatish Balay 
28149b5e25fSSatish Balay   PetscFunctionBegin;
2822dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
283d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2841ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
28549b5e25fSSatish Balay 
28649b5e25fSSatish Balay   v  = a->a;
28749b5e25fSSatish Balay   xb = x;
28849b5e25fSSatish Balay 
28949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
29049b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
29149b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
29249b5e25fSSatish Balay     ib          = aj + *ai;
293831a3094SHong Zhang     jmin        = 0;
29498c9bda7SSatish Balay     nonzerorow += (n>0);
2957fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
29649b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
29749b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
29849b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
29949b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
300831a3094SHong Zhang       v        += 16; jmin++;
3017fbae186SHong Zhang     }
302444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
303444d8c10SJed Brown     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
304831a3094SHong Zhang     for (j=jmin; j<n; j++) {
30549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
30649b5e25fSSatish Balay       cval       = ib[j]*4;
30749b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
30849b5e25fSSatish Balay       z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
30949b5e25fSSatish Balay       z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
31049b5e25fSSatish Balay       z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
31149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
31249b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
31349b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
31449b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
31549b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
31649b5e25fSSatish Balay       v        += 16;
31749b5e25fSSatish Balay     }
31849b5e25fSSatish Balay     xb +=4; ai++;
31949b5e25fSSatish Balay   }
32049b5e25fSSatish Balay 
321d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3221ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
323dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
32449b5e25fSSatish Balay   PetscFunctionReturn(0);
32549b5e25fSSatish Balay }
32649b5e25fSSatish Balay 
3274a2ae208SSatish Balay #undef __FUNCT__
3284a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
329dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
33049b5e25fSSatish Balay {
33149b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
332d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4,x5,zero=0.0;
333d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
334d9ca1df4SBarry Smith   const MatScalar   *v;
3356849ba73SBarry Smith   PetscErrorCode    ierr;
336d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
337d9ca1df4SBarry Smith   const PetscInt    *aj = a->j,*ai = a->i,*ib;
33898c9bda7SSatish Balay   PetscInt          nonzerorow=0;
33949b5e25fSSatish Balay 
34049b5e25fSSatish Balay   PetscFunctionBegin;
3412dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
342d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3431ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
34449b5e25fSSatish Balay 
34549b5e25fSSatish Balay   v  = a->a;
34649b5e25fSSatish Balay   xb = x;
34749b5e25fSSatish Balay 
34849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
34949b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
35049b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
35149b5e25fSSatish Balay     ib          = aj + *ai;
352831a3094SHong Zhang     jmin        = 0;
35398c9bda7SSatish Balay     nonzerorow += (n>0);
3547fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
35549b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
35649b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
35749b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
35849b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
35949b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
360831a3094SHong Zhang       v        += 25; jmin++;
3617fbae186SHong Zhang     }
362444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
363444d8c10SJed Brown     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
364831a3094SHong Zhang     for (j=jmin; j<n; j++) {
36549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
36649b5e25fSSatish Balay       cval       = ib[j]*5;
36749b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
36849b5e25fSSatish Balay       z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
36949b5e25fSSatish Balay       z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
37049b5e25fSSatish Balay       z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
37149b5e25fSSatish Balay       z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
37249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
37349b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
37449b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
37549b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
37649b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
37749b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
37849b5e25fSSatish Balay       v        += 25;
37949b5e25fSSatish Balay     }
38049b5e25fSSatish Balay     xb +=5; ai++;
38149b5e25fSSatish Balay   }
38249b5e25fSSatish Balay 
383d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3841ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
385dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
38649b5e25fSSatish Balay   PetscFunctionReturn(0);
38749b5e25fSSatish Balay }
38849b5e25fSSatish Balay 
38949b5e25fSSatish Balay 
3904a2ae208SSatish Balay #undef __FUNCT__
3914a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
392dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
39349b5e25fSSatish Balay {
39449b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
395d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4,x5,x6,zero=0.0;
396d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
397d9ca1df4SBarry Smith   const MatScalar   *v;
3986849ba73SBarry Smith   PetscErrorCode    ierr;
399d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
400d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
40198c9bda7SSatish Balay   PetscInt          nonzerorow=0;
40249b5e25fSSatish Balay 
40349b5e25fSSatish Balay   PetscFunctionBegin;
4042dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
405d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4061ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
40749b5e25fSSatish Balay 
40849b5e25fSSatish Balay   v  = a->a;
40949b5e25fSSatish Balay   xb = x;
41049b5e25fSSatish Balay 
41149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
41249b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
41349b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
41449b5e25fSSatish Balay     ib          = aj + *ai;
415831a3094SHong Zhang     jmin        = 0;
41698c9bda7SSatish Balay     nonzerorow += (n>0);
4177fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
41849b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
41949b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
42049b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
42149b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
42249b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
42349b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
424831a3094SHong Zhang       v        += 36; jmin++;
4257fbae186SHong Zhang     }
426444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
427444d8c10SJed Brown     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
428831a3094SHong Zhang     for (j=jmin; j<n; j++) {
42949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
43049b5e25fSSatish Balay       cval       = ib[j]*6;
43149b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
43249b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
43349b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
43449b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
43549b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
43649b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
43749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
43849b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
43949b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
44049b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
44149b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
44249b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
44349b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
44449b5e25fSSatish Balay       v        += 36;
44549b5e25fSSatish Balay     }
44649b5e25fSSatish Balay     xb +=6; ai++;
44749b5e25fSSatish Balay   }
44849b5e25fSSatish Balay 
449d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4501ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
451dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
45249b5e25fSSatish Balay   PetscFunctionReturn(0);
45349b5e25fSSatish Balay }
4544a2ae208SSatish Balay #undef __FUNCT__
4554a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
456dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
45749b5e25fSSatish Balay {
45849b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
459d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
460d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
461d9ca1df4SBarry Smith   const MatScalar   *v;
4626849ba73SBarry Smith   PetscErrorCode    ierr;
463d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
464d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
46598c9bda7SSatish Balay   PetscInt          nonzerorow=0;
46649b5e25fSSatish Balay 
46749b5e25fSSatish Balay   PetscFunctionBegin;
4682dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
469d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4701ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
47149b5e25fSSatish Balay 
47249b5e25fSSatish Balay   v  = a->a;
47349b5e25fSSatish Balay   xb = x;
47449b5e25fSSatish Balay 
47549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
47649b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
47749b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
47849b5e25fSSatish Balay     ib          = aj + *ai;
479831a3094SHong Zhang     jmin        = 0;
48098c9bda7SSatish Balay     nonzerorow += (n>0);
4817fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
48249b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
48349b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
48449b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
48549b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
48649b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
48749b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
48849b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
489831a3094SHong Zhang       v        += 49; jmin++;
4907fbae186SHong Zhang     }
491444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
492444d8c10SJed Brown     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
493831a3094SHong Zhang     for (j=jmin; j<n; j++) {
49449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
49549b5e25fSSatish Balay       cval       = ib[j]*7;
49649b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
49749b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
49849b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
49949b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
50049b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
50149b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
50249b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
50349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
50449b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
50549b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
50649b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
50749b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
50849b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
50949b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
51049b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
51149b5e25fSSatish Balay       v       += 49;
51249b5e25fSSatish Balay     }
51349b5e25fSSatish Balay     xb +=7; ai++;
51449b5e25fSSatish Balay   }
515d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5161ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
517dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
51849b5e25fSSatish Balay   PetscFunctionReturn(0);
51949b5e25fSSatish Balay }
52049b5e25fSSatish Balay 
52149b5e25fSSatish Balay /*
52249b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
52349b5e25fSSatish Balay */
5244a2ae208SSatish Balay #undef __FUNCT__
5254a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
526dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
52749b5e25fSSatish Balay {
52849b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
529d9ca1df4SBarry Smith   PetscScalar       *z,*z_ptr,*zb,*work,*workt,zero=0.0;
530d9ca1df4SBarry Smith   const PetscScalar *x,*x_ptr,*xb;
531d9ca1df4SBarry Smith   const MatScalar   *v;
532dfbe8321SBarry Smith   PetscErrorCode    ierr;
533d9ca1df4SBarry Smith   PetscInt          mbs =a->mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
534d9ca1df4SBarry Smith   const PetscInt    *idx,*aj,*ii;
53598c9bda7SSatish Balay   PetscInt          nonzerorow=0;
53649b5e25fSSatish Balay 
53749b5e25fSSatish Balay   PetscFunctionBegin;
5382dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
539d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);x_ptr = x;
5401ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
54149b5e25fSSatish Balay 
54249b5e25fSSatish Balay   aj = a->j;
54349b5e25fSSatish Balay   v  = a->a;
54449b5e25fSSatish Balay   ii = a->i;
54549b5e25fSSatish Balay 
54649b5e25fSSatish Balay   if (!a->mult_work) {
547854ce69bSBarry Smith     ierr = PetscMalloc1(A->rmap->N+1,&a->mult_work);CHKERRQ(ierr);
54849b5e25fSSatish Balay   }
54949b5e25fSSatish Balay   work = a->mult_work;
55049b5e25fSSatish Balay 
55149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
55249b5e25fSSatish Balay     n           = ii[1] - ii[0]; ncols = n*bs;
55349b5e25fSSatish Balay     workt       = work; idx=aj+ii[0];
55498c9bda7SSatish Balay     nonzerorow += (n>0);
55549b5e25fSSatish Balay 
55649b5e25fSSatish Balay     /* upper triangular part */
55749b5e25fSSatish Balay     for (j=0; j<n; j++) {
55849b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
55949b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
56049b5e25fSSatish Balay       workt += bs;
56149b5e25fSSatish Balay     }
56249b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
56396b95a6bSBarry Smith     PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
56449b5e25fSSatish Balay 
56549b5e25fSSatish Balay     /* strict lower triangular part */
566831a3094SHong Zhang     idx = aj+ii[0];
567831a3094SHong Zhang     if (*idx == i) {
56896b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
569831a3094SHong Zhang     }
57096b9376eSHong Zhang 
57149b5e25fSSatish Balay     if (ncols > 0) {
57249b5e25fSSatish Balay       workt = work;
57387828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
57496b95a6bSBarry Smith       PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
575831a3094SHong Zhang       for (j=0; j<n; j++) {
576831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
57749b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k];
57849b5e25fSSatish Balay         workt += bs;
57949b5e25fSSatish Balay       }
58049b5e25fSSatish Balay     }
58149b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
58249b5e25fSSatish Balay   }
58349b5e25fSSatish Balay 
584d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5851ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
586dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr);
58749b5e25fSSatish Balay   PetscFunctionReturn(0);
58849b5e25fSSatish Balay }
58949b5e25fSSatish Balay 
5904a2ae208SSatish Balay #undef __FUNCT__
5914a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
592dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
59349b5e25fSSatish Balay {
59449b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
595d9ca1df4SBarry Smith   PetscScalar       *z,x1;
596d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
597d9ca1df4SBarry Smith   const MatScalar   *v;
5986849ba73SBarry Smith   PetscErrorCode    ierr;
599d9ca1df4SBarry Smith   PetscInt          mbs =a->mbs,i,n,cval,j,jmin;
600d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
60198c9bda7SSatish Balay   PetscInt          nonzerorow=0;
60249b5e25fSSatish Balay 
60349b5e25fSSatish Balay   PetscFunctionBegin;
604343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
605d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6061ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
60749b5e25fSSatish Balay   v    = a->a;
60849b5e25fSSatish Balay   xb   = x;
60949b5e25fSSatish Balay 
61049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
61149b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th row of A */
61249b5e25fSSatish Balay     x1          = xb[0];
61349b5e25fSSatish Balay     ib          = aj + *ai;
614831a3094SHong Zhang     jmin        = 0;
61598c9bda7SSatish Balay     nonzerorow += (n>0);
616831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
617831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
618831a3094SHong Zhang     }
619831a3094SHong Zhang     for (j=jmin; j<n; j++) {
62049b5e25fSSatish Balay       cval    = *ib;
62149b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
62249b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
62349b5e25fSSatish Balay     }
62449b5e25fSSatish Balay     xb++; ai++;
62549b5e25fSSatish Balay   }
62649b5e25fSSatish Balay 
627d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
628bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
62949b5e25fSSatish Balay 
630dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
63149b5e25fSSatish Balay   PetscFunctionReturn(0);
63249b5e25fSSatish Balay }
63349b5e25fSSatish Balay 
6344a2ae208SSatish Balay #undef __FUNCT__
6354a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
636dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
63749b5e25fSSatish Balay {
63849b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
639d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2;
640d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
641d9ca1df4SBarry Smith   const MatScalar   *v;
6426849ba73SBarry Smith   PetscErrorCode    ierr;
643d9ca1df4SBarry Smith   PetscInt          mbs =a->mbs,i,n,cval,j,jmin;
644d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
64598c9bda7SSatish Balay   PetscInt          nonzerorow=0;
64649b5e25fSSatish Balay 
64749b5e25fSSatish Balay   PetscFunctionBegin;
648343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
649d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6501ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
65149b5e25fSSatish Balay 
65249b5e25fSSatish Balay   v  = a->a;
65349b5e25fSSatish Balay   xb = x;
65449b5e25fSSatish Balay 
65549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
65649b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
65749b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1];
65849b5e25fSSatish Balay     ib          = aj + *ai;
659831a3094SHong Zhang     jmin        = 0;
66098c9bda7SSatish Balay     nonzerorow += (n>0);
6617fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
66249b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
66349b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
664831a3094SHong Zhang       v        += 4; jmin++;
6657fbae186SHong Zhang     }
666444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
667444d8c10SJed Brown     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
668831a3094SHong Zhang     for (j=jmin; j<n; j++) {
66949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
67049b5e25fSSatish Balay       cval       = ib[j]*2;
67149b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2;
67249b5e25fSSatish Balay       z[cval+1] += v[2]*x1 + v[3]*x2;
67349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
67449b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
67549b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
67649b5e25fSSatish Balay       v        += 4;
67749b5e25fSSatish Balay     }
67849b5e25fSSatish Balay     xb +=2; ai++;
67949b5e25fSSatish Balay   }
680d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
681bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
68249b5e25fSSatish Balay 
683dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
68449b5e25fSSatish Balay   PetscFunctionReturn(0);
68549b5e25fSSatish Balay }
68649b5e25fSSatish Balay 
6874a2ae208SSatish Balay #undef __FUNCT__
6884a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
689dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
69049b5e25fSSatish Balay {
69149b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
692d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3;
693d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
694d9ca1df4SBarry Smith   const MatScalar   *v;
6956849ba73SBarry Smith   PetscErrorCode    ierr;
696d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
697d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
69898c9bda7SSatish Balay   PetscInt          nonzerorow=0;
69949b5e25fSSatish Balay 
70049b5e25fSSatish Balay   PetscFunctionBegin;
701343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
702d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7031ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
70449b5e25fSSatish Balay 
70549b5e25fSSatish Balay   v  = a->a;
70649b5e25fSSatish Balay   xb = x;
70749b5e25fSSatish Balay 
70849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
70949b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
71049b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2];
71149b5e25fSSatish Balay     ib          = aj + *ai;
712831a3094SHong Zhang     jmin        = 0;
71398c9bda7SSatish Balay     nonzerorow += (n>0);
7147fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
71549b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
71649b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
71749b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
718831a3094SHong Zhang       v        += 9; jmin++;
7197fbae186SHong Zhang     }
720444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
721444d8c10SJed Brown     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
722831a3094SHong Zhang     for (j=jmin; j<n; j++) {
72349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
72449b5e25fSSatish Balay       cval       = ib[j]*3;
72549b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3;
72649b5e25fSSatish Balay       z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
72749b5e25fSSatish Balay       z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
72849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
72949b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
73049b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
73149b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
73249b5e25fSSatish Balay       v        += 9;
73349b5e25fSSatish Balay     }
73449b5e25fSSatish Balay     xb +=3; ai++;
73549b5e25fSSatish Balay   }
73649b5e25fSSatish Balay 
737d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
738bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
73949b5e25fSSatish Balay 
740dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
74149b5e25fSSatish Balay   PetscFunctionReturn(0);
74249b5e25fSSatish Balay }
74349b5e25fSSatish Balay 
7444a2ae208SSatish Balay #undef __FUNCT__
7454a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
746dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
74749b5e25fSSatish Balay {
74849b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
749d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4;
750d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
751d9ca1df4SBarry Smith   const MatScalar   *v;
7526849ba73SBarry Smith   PetscErrorCode    ierr;
753d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
754d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
75598c9bda7SSatish Balay   PetscInt          nonzerorow=0;
75649b5e25fSSatish Balay 
75749b5e25fSSatish Balay   PetscFunctionBegin;
758343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
759d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7601ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
76149b5e25fSSatish Balay 
76249b5e25fSSatish Balay   v  = a->a;
76349b5e25fSSatish Balay   xb = x;
76449b5e25fSSatish Balay 
76549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
76649b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
76749b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
76849b5e25fSSatish Balay     ib          = aj + *ai;
769831a3094SHong Zhang     jmin        = 0;
77098c9bda7SSatish Balay     nonzerorow += (n>0);
7717fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
77249b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
77349b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
77449b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
77549b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
776831a3094SHong Zhang       v        += 16; jmin++;
7777fbae186SHong Zhang     }
778444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
779444d8c10SJed Brown     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
780831a3094SHong Zhang     for (j=jmin; j<n; j++) {
78149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
78249b5e25fSSatish Balay       cval       = ib[j]*4;
78349b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
78449b5e25fSSatish Balay       z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
78549b5e25fSSatish Balay       z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
78649b5e25fSSatish Balay       z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
78749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
78849b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
78949b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
79049b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
79149b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
79249b5e25fSSatish Balay       v        += 16;
79349b5e25fSSatish Balay     }
79449b5e25fSSatish Balay     xb +=4; ai++;
79549b5e25fSSatish Balay   }
79649b5e25fSSatish Balay 
797d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
798bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
79949b5e25fSSatish Balay 
800dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
80149b5e25fSSatish Balay   PetscFunctionReturn(0);
80249b5e25fSSatish Balay }
80349b5e25fSSatish Balay 
8044a2ae208SSatish Balay #undef __FUNCT__
8054a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
806dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
80749b5e25fSSatish Balay {
80849b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
809d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4,x5;
810d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
811d9ca1df4SBarry Smith   const MatScalar   *v;
8126849ba73SBarry Smith   PetscErrorCode    ierr;
813d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
814d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
81598c9bda7SSatish Balay   PetscInt          nonzerorow=0;
81649b5e25fSSatish Balay 
81749b5e25fSSatish Balay   PetscFunctionBegin;
818343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
819d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8201ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
82149b5e25fSSatish Balay 
82249b5e25fSSatish Balay   v  = a->a;
82349b5e25fSSatish Balay   xb = x;
82449b5e25fSSatish Balay 
82549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
82649b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
82749b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
82849b5e25fSSatish Balay     ib          = aj + *ai;
829831a3094SHong Zhang     jmin        = 0;
83098c9bda7SSatish Balay     nonzerorow += (n>0);
8317fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
83249b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
83349b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
83449b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
83549b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
83649b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
837831a3094SHong Zhang       v        += 25; jmin++;
8387fbae186SHong Zhang     }
839444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
840444d8c10SJed Brown     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
841831a3094SHong Zhang     for (j=jmin; j<n; j++) {
84249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
84349b5e25fSSatish Balay       cval       = ib[j]*5;
84449b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
84549b5e25fSSatish Balay       z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
84649b5e25fSSatish Balay       z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
84749b5e25fSSatish Balay       z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
84849b5e25fSSatish Balay       z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
84949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
85049b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
85149b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
85249b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
85349b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
85449b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
85549b5e25fSSatish Balay       v        += 25;
85649b5e25fSSatish Balay     }
85749b5e25fSSatish Balay     xb +=5; ai++;
85849b5e25fSSatish Balay   }
85949b5e25fSSatish Balay 
860d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
861bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
86249b5e25fSSatish Balay 
863dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
86449b5e25fSSatish Balay   PetscFunctionReturn(0);
86549b5e25fSSatish Balay }
8664a2ae208SSatish Balay #undef __FUNCT__
8674a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
868dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
86949b5e25fSSatish Balay {
87049b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
871d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4,x5,x6;
872d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
873d9ca1df4SBarry Smith   const MatScalar   *v;
8746849ba73SBarry Smith   PetscErrorCode    ierr;
875d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
876d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
87798c9bda7SSatish Balay   PetscInt          nonzerorow=0;
87849b5e25fSSatish Balay 
87949b5e25fSSatish Balay   PetscFunctionBegin;
880343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
881d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8821ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
88349b5e25fSSatish Balay 
88449b5e25fSSatish Balay   v  = a->a;
88549b5e25fSSatish Balay   xb = x;
88649b5e25fSSatish Balay 
88749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
88849b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
88949b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
89049b5e25fSSatish Balay     ib          = aj + *ai;
891831a3094SHong Zhang     jmin        = 0;
89298c9bda7SSatish Balay     nonzerorow += (n>0);
8937fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
89449b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
89549b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
89649b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
89749b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
89849b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
89949b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
900831a3094SHong Zhang       v        += 36; jmin++;
9017fbae186SHong Zhang     }
902444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
903444d8c10SJed Brown     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
904831a3094SHong Zhang     for (j=jmin; j<n; j++) {
90549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
90649b5e25fSSatish Balay       cval       = ib[j]*6;
90749b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
90849b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
90949b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
91049b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
91149b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
91249b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
91349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
91449b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
91549b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
91649b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
91749b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
91849b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
91949b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
92049b5e25fSSatish Balay       v        += 36;
92149b5e25fSSatish Balay     }
92249b5e25fSSatish Balay     xb +=6; ai++;
92349b5e25fSSatish Balay   }
92449b5e25fSSatish Balay 
925d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
926bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
92749b5e25fSSatish Balay 
928dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
92949b5e25fSSatish Balay   PetscFunctionReturn(0);
93049b5e25fSSatish Balay }
93149b5e25fSSatish Balay 
9324a2ae208SSatish Balay #undef __FUNCT__
9334a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
934dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
93549b5e25fSSatish Balay {
93649b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
937d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4,x5,x6,x7;
938d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
939d9ca1df4SBarry Smith   const MatScalar   *v;
9406849ba73SBarry Smith   PetscErrorCode    ierr;
941d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
942d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
94398c9bda7SSatish Balay   PetscInt          nonzerorow=0;
94449b5e25fSSatish Balay 
94549b5e25fSSatish Balay   PetscFunctionBegin;
946343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
947d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9481ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
94949b5e25fSSatish Balay 
95049b5e25fSSatish Balay   v  = a->a;
95149b5e25fSSatish Balay   xb = x;
95249b5e25fSSatish Balay 
95349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
95449b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
95549b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
95649b5e25fSSatish Balay     ib          = aj + *ai;
957831a3094SHong Zhang     jmin        = 0;
95898c9bda7SSatish Balay     nonzerorow += (n>0);
9597fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
96049b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
96149b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
96249b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
96349b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
96449b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
96549b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
96649b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
967831a3094SHong Zhang       v        += 49; jmin++;
9687fbae186SHong Zhang     }
969444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
970444d8c10SJed Brown     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
971831a3094SHong Zhang     for (j=jmin; j<n; j++) {
97249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
97349b5e25fSSatish Balay       cval       = ib[j]*7;
97449b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
97549b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
97649b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
97749b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
97849b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
97949b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
98049b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
98149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
98249b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
98349b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
98449b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
98549b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
98649b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
98749b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
98849b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
98949b5e25fSSatish Balay       v       += 49;
99049b5e25fSSatish Balay     }
99149b5e25fSSatish Balay     xb +=7; ai++;
99249b5e25fSSatish Balay   }
99349b5e25fSSatish Balay 
994d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
995bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
99649b5e25fSSatish Balay 
997dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
99849b5e25fSSatish Balay   PetscFunctionReturn(0);
99949b5e25fSSatish Balay }
100049b5e25fSSatish Balay 
10014a2ae208SSatish Balay #undef __FUNCT__
10024a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1003dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
100449b5e25fSSatish Balay {
100549b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
1006d9ca1df4SBarry Smith   PetscScalar       *z,*z_ptr=0,*zb,*work,*workt;
1007d9ca1df4SBarry Smith   const PetscScalar *x,*x_ptr,*xb;
1008d9ca1df4SBarry Smith   const MatScalar   *v;
1009dfbe8321SBarry Smith   PetscErrorCode    ierr;
1010d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
1011d9ca1df4SBarry Smith   const PetscInt    *idx,*aj,*ii;
101298c9bda7SSatish Balay   PetscInt          nonzerorow=0;
101349b5e25fSSatish Balay 
101449b5e25fSSatish Balay   PetscFunctionBegin;
1015343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
1016d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); x_ptr=x;
10171ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
101849b5e25fSSatish Balay 
101949b5e25fSSatish Balay   aj = a->j;
102049b5e25fSSatish Balay   v  = a->a;
102149b5e25fSSatish Balay   ii = a->i;
102249b5e25fSSatish Balay 
102349b5e25fSSatish Balay   if (!a->mult_work) {
1024854ce69bSBarry Smith     ierr = PetscMalloc1(A->rmap->n+1,&a->mult_work);CHKERRQ(ierr);
102549b5e25fSSatish Balay   }
102649b5e25fSSatish Balay   work = a->mult_work;
102749b5e25fSSatish Balay 
102849b5e25fSSatish Balay 
102949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
103049b5e25fSSatish Balay     n           = ii[1] - ii[0]; ncols = n*bs;
103149b5e25fSSatish Balay     workt       = work; idx=aj+ii[0];
103298c9bda7SSatish Balay     nonzerorow += (n>0);
103349b5e25fSSatish Balay 
103449b5e25fSSatish Balay     /* upper triangular part */
103549b5e25fSSatish Balay     for (j=0; j<n; j++) {
103649b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
103749b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
103849b5e25fSSatish Balay       workt += bs;
103949b5e25fSSatish Balay     }
104049b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
104196b95a6bSBarry Smith     PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
104249b5e25fSSatish Balay 
104349b5e25fSSatish Balay     /* strict lower triangular part */
1044831a3094SHong Zhang     idx = aj+ii[0];
1045831a3094SHong Zhang     if (*idx == i) {
104696b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1047831a3094SHong Zhang     }
104849b5e25fSSatish Balay     if (ncols > 0) {
104949b5e25fSSatish Balay       workt = work;
105087828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
105196b95a6bSBarry Smith       PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1052831a3094SHong Zhang       for (j=0; j<n; j++) {
1053831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
105449b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k];
105549b5e25fSSatish Balay         workt += bs;
105649b5e25fSSatish Balay       }
105749b5e25fSSatish Balay     }
105849b5e25fSSatish Balay 
105949b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
106049b5e25fSSatish Balay   }
106149b5e25fSSatish Balay 
1062d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1063bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
106449b5e25fSSatish Balay 
1065dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
106649b5e25fSSatish Balay   PetscFunctionReturn(0);
106749b5e25fSSatish Balay }
106849b5e25fSSatish Balay 
10694a2ae208SSatish Balay #undef __FUNCT__
10704a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1071f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
107249b5e25fSSatish Balay {
107349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a     = (Mat_SeqSBAIJ*)inA->data;
1074f4df32b1SMatthew Knepley   PetscScalar    oalpha = alpha;
1075efee365bSSatish Balay   PetscErrorCode ierr;
1076c5df96a5SBarry Smith   PetscBLASInt   one = 1,totalnz;
107749b5e25fSSatish Balay 
107849b5e25fSSatish Balay   PetscFunctionBegin;
1079c5df96a5SBarry Smith   ierr = PetscBLASIntCast(a->bs2*a->nz,&totalnz);CHKERRQ(ierr);
10808b83055fSJed Brown   PetscStackCallBLAS("BLASscal",BLASscal_(&totalnz,&oalpha,a->a,&one));
1081efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
108249b5e25fSSatish Balay   PetscFunctionReturn(0);
108349b5e25fSSatish Balay }
108449b5e25fSSatish Balay 
10854a2ae208SSatish Balay #undef __FUNCT__
10864a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1087dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
108849b5e25fSSatish Balay {
108949b5e25fSSatish Balay   Mat_SeqSBAIJ    *a       = (Mat_SeqSBAIJ*)A->data;
1090d9ca1df4SBarry Smith   const MatScalar *v       = a->a;
109149b5e25fSSatish Balay   PetscReal       sum_diag = 0.0, sum_off = 0.0, *sum;
1092d9ca1df4SBarry Smith   PetscInt        i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,jmin,jmax,nexti,ik,*jl,*il;
10936849ba73SBarry Smith   PetscErrorCode  ierr;
1094d9ca1df4SBarry Smith   const PetscInt  *aj=a->j,*col;
109549b5e25fSSatish Balay 
109649b5e25fSSatish Balay   PetscFunctionBegin;
109749b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
109849b5e25fSSatish Balay     for (k=0; k<mbs; k++) {
109949b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1100831a3094SHong Zhang       col  = aj + jmin;
1101831a3094SHong Zhang       if (*col == k) {         /* diagonal block */
110249b5e25fSSatish Balay         for (i=0; i<bs2; i++) {
110349b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
110449b5e25fSSatish Balay         }
1105831a3094SHong Zhang         jmin++;
1106831a3094SHong Zhang       }
1107831a3094SHong Zhang       for (j=jmin; j<jmax; j++) {  /* off-diagonal blocks */
110849b5e25fSSatish Balay         for (i=0; i<bs2; i++) {
110949b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
111049b5e25fSSatish Balay         }
111149b5e25fSSatish Balay       }
111249b5e25fSSatish Balay     }
11138f1a2a5eSBarry Smith     *norm = PetscSqrtReal(sum_diag + 2*sum_off);
1114*51f70360SJed Brown     ierr = PetscLogFlops(2*bs2*a->nz);CHKERRQ(ierr);
11150b8dc8d2SHong Zhang   } else if (type == NORM_INFINITY || type == NORM_1) {  /* maximum row/column sum */
1116dcca6d9dSJed Brown     ierr = PetscMalloc3(bs,&sum,mbs,&il,mbs,&jl);CHKERRQ(ierr);
11170b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11180b8dc8d2SHong Zhang     il[0] = 0;
111949b5e25fSSatish Balay 
112049b5e25fSSatish Balay     *norm = 0.0;
112149b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
112249b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
112349b5e25fSSatish Balay       /*-- col sum --*/
112449b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1125831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1126831a3094SHong Zhang                   at step k */
112749b5e25fSSatish Balay       while (i<mbs) {
112849b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
112949b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
113049b5e25fSSatish Balay         for (j=0; j<bs; j++) {
113149b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
113249b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
113349b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
113449b5e25fSSatish Balay           }
113549b5e25fSSatish Balay         }
113649b5e25fSSatish Balay         /* update il, jl */
1137831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1138831a3094SHong Zhang         jmax = a->i[i+1];
113949b5e25fSSatish Balay         if (jmin < jmax) {
114049b5e25fSSatish Balay           il[i] = jmin;
114149b5e25fSSatish Balay           j     = a->j[jmin];
114249b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
114349b5e25fSSatish Balay         }
114449b5e25fSSatish Balay         i = nexti;
114549b5e25fSSatish Balay       }
114649b5e25fSSatish Balay       /*-- row sum --*/
114749b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
114849b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
114949b5e25fSSatish Balay         for (j=0; j<bs; j++) {
115049b5e25fSSatish Balay           v = a->a + i*bs2 + j;
115149b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
11520b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
115349b5e25fSSatish Balay           }
115449b5e25fSSatish Balay         }
115549b5e25fSSatish Balay       }
115649b5e25fSSatish Balay       /* add k_th block row to il, jl */
1157831a3094SHong Zhang       col = aj+jmin;
1158831a3094SHong Zhang       if (*col == k) jmin++;
115949b5e25fSSatish Balay       if (jmin < jmax) {
116049b5e25fSSatish Balay         il[k] = jmin;
11610b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
116249b5e25fSSatish Balay       }
116349b5e25fSSatish Balay       for (j=0; j<bs; j++) {
116449b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
116549b5e25fSSatish Balay       }
116649b5e25fSSatish Balay     }
116774ed9c26SBarry Smith     ierr = PetscFree3(sum,il,jl);CHKERRQ(ierr);
1168*51f70360SJed Brown     ierr = PetscLogFlops(PetscMax(mbs*a->nz-1,0));CHKERRQ(ierr);
1169f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
117049b5e25fSSatish Balay   PetscFunctionReturn(0);
117149b5e25fSSatish Balay }
117249b5e25fSSatish Balay 
11734a2ae208SSatish Balay #undef __FUNCT__
11744a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1175ace3abfcSBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscBool * flg)
117649b5e25fSSatish Balay {
117749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)B->data;
1178dfbe8321SBarry Smith   PetscErrorCode ierr;
117949b5e25fSSatish Balay 
118049b5e25fSSatish Balay   PetscFunctionBegin;
118149b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1182d0f46423SBarry Smith   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1183ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1184ef511fbeSHong Zhang     PetscFunctionReturn(0);
118549b5e25fSSatish Balay   }
118649b5e25fSSatish Balay 
118749b5e25fSSatish Balay   /* if the a->i are the same */
118813f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
118926fbe8dcSKarl Rupp   if (!*flg) PetscFunctionReturn(0);
119049b5e25fSSatish Balay 
119149b5e25fSSatish Balay   /* if a->j are the same */
119213f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
119326fbe8dcSKarl Rupp   if (!*flg) PetscFunctionReturn(0);
119426fbe8dcSKarl Rupp 
119549b5e25fSSatish Balay   /* if a->a are the same */
1196d0f46423SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1197935af2e7SHong Zhang   PetscFunctionReturn(0);
119849b5e25fSSatish Balay }
119949b5e25fSSatish Balay 
12004a2ae208SSatish Balay #undef __FUNCT__
12014a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1202dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
120349b5e25fSSatish Balay {
120449b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
1205dfbe8321SBarry Smith   PetscErrorCode  ierr;
1206d9ca1df4SBarry Smith   PetscInt        i,j,k,row,bs,ambs,bs2;
1207d9ca1df4SBarry Smith   const PetscInt  *ai,*aj;
120887828ca2SBarry Smith   PetscScalar     *x,zero = 0.0;
1209d9ca1df4SBarry Smith   const MatScalar *aa,*aa_j;
121049b5e25fSSatish Balay 
121149b5e25fSSatish Balay   PetscFunctionBegin;
1212d0f46423SBarry Smith   bs = A->rmap->bs;
1213e32f2f54SBarry Smith   if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
121482799104SHong Zhang 
121549b5e25fSSatish Balay   aa   = a->a;
12168a0c6efdSHong Zhang   ambs = a->mbs;
12178a0c6efdSHong Zhang 
12188a0c6efdSHong Zhang   if (A->factortype == MAT_FACTOR_CHOLESKY || A->factortype == MAT_FACTOR_ICC) {
12198a0c6efdSHong Zhang     PetscInt *diag=a->diag;
12208a0c6efdSHong Zhang     aa   = a->a;
12218a0c6efdSHong Zhang     ambs = a->mbs;
12228a0c6efdSHong Zhang     ierr = VecGetArray(v,&x);CHKERRQ(ierr);
12238a0c6efdSHong Zhang     for (i=0; i<ambs; i++) x[i] = 1.0/aa[diag[i]];
12248a0c6efdSHong Zhang     ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
12258a0c6efdSHong Zhang     PetscFunctionReturn(0);
12268a0c6efdSHong Zhang   }
12278a0c6efdSHong Zhang 
122849b5e25fSSatish Balay   ai   = a->i;
122949b5e25fSSatish Balay   aj   = a->j;
123049b5e25fSSatish Balay   bs2  = a->bs2;
12312dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
12321ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
123349b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
123449b5e25fSSatish Balay     j=ai[i];
123549b5e25fSSatish Balay     if (aj[j] == i) {    /* if this is a diagonal element */
123649b5e25fSSatish Balay       row  = i*bs;
123749b5e25fSSatish Balay       aa_j = aa + j*bs2;
123849b5e25fSSatish Balay       for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
123949b5e25fSSatish Balay     }
124049b5e25fSSatish Balay   }
12411ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
124249b5e25fSSatish Balay   PetscFunctionReturn(0);
124349b5e25fSSatish Balay }
124449b5e25fSSatish Balay 
12454a2ae208SSatish Balay #undef __FUNCT__
12464a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1247dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
124849b5e25fSSatish Balay {
124949b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
1250d9ca1df4SBarry Smith   PetscScalar       x;
1251d9ca1df4SBarry Smith   const PetscScalar *l,*li,*ri;
125249b5e25fSSatish Balay   MatScalar         *aa,*v;
1253dfbe8321SBarry Smith   PetscErrorCode    ierr;
12545e90f9d9SHong Zhang   PetscInt          i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1255ace3abfcSBarry Smith   PetscBool         flg;
125649b5e25fSSatish Balay 
125749b5e25fSSatish Balay   PetscFunctionBegin;
1258b3bf805bSHong Zhang   if (ll != rr) {
1259b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1260e7e72b3dSBarry Smith     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1261b3bf805bSHong Zhang   }
1262b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
126349b5e25fSSatish Balay   ai  = a->i;
126449b5e25fSSatish Balay   aj  = a->j;
126549b5e25fSSatish Balay   aa  = a->a;
1266d0f46423SBarry Smith   m   = A->rmap->N;
1267d0f46423SBarry Smith   bs  = A->rmap->bs;
126849b5e25fSSatish Balay   mbs = a->mbs;
126949b5e25fSSatish Balay   bs2 = a->bs2;
127049b5e25fSSatish Balay 
1271d9ca1df4SBarry Smith   ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
127249b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1273e32f2f54SBarry Smith   if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
127449b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
127549b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
127649b5e25fSSatish Balay     li = l + i*bs;
127749b5e25fSSatish Balay     v  = aa + bs2*ai[i];
127849b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
127949b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
12805e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
128149b5e25fSSatish Balay         x = ri[k];
128249b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
128349b5e25fSSatish Balay       }
128449b5e25fSSatish Balay     }
128549b5e25fSSatish Balay   }
1286d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
1287dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
128849b5e25fSSatish Balay   PetscFunctionReturn(0);
128949b5e25fSSatish Balay }
129049b5e25fSSatish Balay 
12914a2ae208SSatish Balay #undef __FUNCT__
12924a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1293dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
129449b5e25fSSatish Balay {
129549b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
129649b5e25fSSatish Balay 
129749b5e25fSSatish Balay   PetscFunctionBegin;
129849b5e25fSSatish Balay   info->block_size   = a->bs2;
1299ceed8ce5SJed Brown   info->nz_allocated = a->bs2*a->maxnz;   /*num. of nonzeros in upper triangular part */
13006c6c5352SBarry Smith   info->nz_used      = a->bs2*a->nz;   /*num. of nonzeros in upper triangular part */
130149b5e25fSSatish Balay   info->nz_unneeded  = (double)(info->nz_allocated - info->nz_used);
130249b5e25fSSatish Balay   info->assemblies   = A->num_ass;
13038e58a170SBarry Smith   info->mallocs      = A->info.mallocs;
13047adad957SLisandro Dalcin   info->memory       = ((PetscObject)A)->mem;
1305d5f3da31SBarry Smith   if (A->factortype) {
130649b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
130749b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
130849b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
130949b5e25fSSatish Balay   } else {
131049b5e25fSSatish Balay     info->fill_ratio_given  = 0;
131149b5e25fSSatish Balay     info->fill_ratio_needed = 0;
131249b5e25fSSatish Balay     info->factor_mallocs    = 0;
131349b5e25fSSatish Balay   }
131449b5e25fSSatish Balay   PetscFunctionReturn(0);
131549b5e25fSSatish Balay }
131649b5e25fSSatish Balay 
131749b5e25fSSatish Balay 
13184a2ae208SSatish Balay #undef __FUNCT__
13194a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1320dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
132149b5e25fSSatish Balay {
132249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1323dfbe8321SBarry Smith   PetscErrorCode ierr;
132449b5e25fSSatish Balay 
132549b5e25fSSatish Balay   PetscFunctionBegin;
132649b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
132749b5e25fSSatish Balay   PetscFunctionReturn(0);
132849b5e25fSSatish Balay }
1329dc354874SHong Zhang 
13304a2ae208SSatish Balay #undef __FUNCT__
1331985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ"
1332985db425SBarry Smith /*
1333985db425SBarry Smith    This code does not work since it only checks the upper triangular part of
1334985db425SBarry Smith   the matrix. Hence it is not listed in the function table.
1335985db425SBarry Smith */
1336985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1337dc354874SHong Zhang {
1338dc354874SHong Zhang   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
1339dfbe8321SBarry Smith   PetscErrorCode  ierr;
1340d9ca1df4SBarry Smith   PetscInt        i,j,n,row,col,bs,mbs;
1341d9ca1df4SBarry Smith   const PetscInt  *ai,*aj;
1342c3fca9a7SHong Zhang   PetscReal       atmp;
1343d9ca1df4SBarry Smith   const MatScalar *aa;
1344985db425SBarry Smith   PetscScalar     *x;
134513f74950SBarry Smith   PetscInt        ncols,brow,bcol,krow,kcol;
1346dc354874SHong Zhang 
1347dc354874SHong Zhang   PetscFunctionBegin;
1348e32f2f54SBarry Smith   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1349e32f2f54SBarry Smith   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1350d0f46423SBarry Smith   bs  = A->rmap->bs;
1351dc354874SHong Zhang   aa  = a->a;
1352dc354874SHong Zhang   ai  = a->i;
1353dc354874SHong Zhang   aj  = a->j;
135444117c81SHong Zhang   mbs = a->mbs;
1355dc354874SHong Zhang 
1356985db425SBarry Smith   ierr = VecSet(v,0.0);CHKERRQ(ierr);
13571ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1358dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1359e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
136044117c81SHong Zhang   for (i=0; i<mbs; i++) {
1361d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1362d0f6400bSHong Zhang     brow  = bs*i;
136344117c81SHong Zhang     for (j=0; j<ncols; j++) {
1364d0f6400bSHong Zhang       bcol = bs*(*aj);
136544117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++) {
1366d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
136744117c81SHong Zhang         for (krow=0; krow<bs; krow++) {
1368d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1369d0f6400bSHong Zhang           row  = brow + krow;   /* row index */
1370c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1371c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
137244117c81SHong Zhang         }
137344117c81SHong Zhang       }
1374d0f6400bSHong Zhang       aj++;
1375dc354874SHong Zhang     }
1376dc354874SHong Zhang   }
13771ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1378dc354874SHong Zhang   PetscFunctionReturn(0);
1379dc354874SHong Zhang }
1380