xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision 847daeccc32330bf60a24b181d91a4a68ad74642)
149b5e25fSSatish Balay 
2c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>
3af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
4c6db04a5SJed Brown #include <petscbt.h>
5c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
6c6db04a5SJed Brown #include <petscblaslapack.h>
749b5e25fSSatish Balay 
84a2ae208SSatish Balay #undef __FUNCT__
94a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1013f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1149b5e25fSSatish Balay {
125eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
136849ba73SBarry Smith   PetscErrorCode ierr;
145d0c19d7SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
155d0c19d7SBarry Smith   const PetscInt *idx;
16db41cccfSHong Zhang   PetscBT        table_out,table_in;
17d94109b8SHong Zhang 
18d94109b8SHong Zhang   PetscFunctionBegin;
19e32f2f54SBarry Smith   if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20d94109b8SHong Zhang   mbs  = a->mbs;
21d94109b8SHong Zhang   ai   = a->i;
22d94109b8SHong Zhang   aj   = a->j;
23d0f46423SBarry Smith   bs   = A->rmap->bs;
2453b8de81SBarry Smith   ierr = PetscBTCreate(mbs,&table_out);CHKERRQ(ierr);
25854ce69bSBarry Smith   ierr = PetscMalloc1(mbs+1,&nidx);CHKERRQ(ierr);
26854ce69bSBarry Smith   ierr = PetscMalloc1(A->rmap->N+1,&nidx2);CHKERRQ(ierr);
2753b8de81SBarry Smith   ierr = PetscBTCreate(mbs,&table_in);CHKERRQ(ierr);
28d94109b8SHong Zhang 
29d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
30d94109b8SHong Zhang     isz  = 0;
31db41cccfSHong Zhang     ierr = PetscBTMemzero(mbs,table_out);CHKERRQ(ierr);
32d94109b8SHong Zhang 
33d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
34d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
35d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
36d94109b8SHong Zhang 
37db41cccfSHong Zhang     /* Enter these into the temp arrays i.e mark table_out[brow], enter brow into new index */
38dbe03f88SHong Zhang     bcol_max = 0;
39d94109b8SHong Zhang     for (j=0; j<n; ++j) {
40d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
41e32f2f54SBarry Smith       if (brow >= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42db41cccfSHong Zhang       if (!PetscBTLookupSet(table_out,brow)) {
43dbe03f88SHong Zhang         nidx[isz++] = brow;
44dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
45dbe03f88SHong Zhang       }
46d94109b8SHong Zhang     }
47d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
486bf464f9SBarry Smith     ierr = ISDestroy(&is[i]);CHKERRQ(ierr);
49d94109b8SHong Zhang 
50d94109b8SHong Zhang     k = 0;
51d94109b8SHong Zhang     for (j=0; j<ov; j++) { /* for each overlap */
52db41cccfSHong Zhang       /* set table_in for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53db41cccfSHong Zhang       ierr = PetscBTMemzero(mbs,table_in);CHKERRQ(ierr);
54db41cccfSHong Zhang       for (l=k; l<isz; l++) { ierr = PetscBTSet(table_in,nidx[l]);CHKERRQ(ierr); }
55d94109b8SHong Zhang 
56d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
57d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++) {
58d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
59db41cccfSHong Zhang         if (PetscBTLookup(table_in,brow)) { /* brow is on nidx - row search: collect all bcol in this brow */
60d94109b8SHong Zhang           for (l = start; l<end; l++) {
61d94109b8SHong Zhang             bcol = aj[l];
62d7b97159SHong Zhang             if (!PetscBTLookupSet(table_out,bcol)) {
63d7b97159SHong Zhang               nidx[isz++] = bcol;
64d7b97159SHong Zhang               if (bcol_max < bcol) bcol_max = bcol;
65d7b97159SHong Zhang             }
66d94109b8SHong Zhang           }
67d94109b8SHong Zhang           k++;
68d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
69d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
70d94109b8SHong Zhang           for (l = start; l<end; l++) {
71d94109b8SHong Zhang             bcol = aj[l];
72dbe03f88SHong Zhang             if (bcol > bcol_max) break;
73db41cccfSHong Zhang             if (PetscBTLookup(table_in,bcol)) {
7426fbe8dcSKarl Rupp               if (!PetscBTLookupSet(table_out,brow)) nidx[isz++] = brow;
75d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
76d94109b8SHong Zhang             }
77d94109b8SHong Zhang           }
78d94109b8SHong Zhang         }
79d94109b8SHong Zhang       }
80d94109b8SHong Zhang     } /* for each overlap */
81d94109b8SHong Zhang 
82d94109b8SHong Zhang     /* expand the Index Set */
83d94109b8SHong Zhang     for (j=0; j<isz; j++) {
8426fbe8dcSKarl Rupp       for (k=0; k<bs; k++) nidx2[j*bs+k] = nidx[j]*bs+k;
85d94109b8SHong Zhang     }
8670b3c8c7SBarry Smith     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,PETSC_COPY_VALUES,is+i);CHKERRQ(ierr);
87d94109b8SHong Zhang   }
8894bacf5dSBarry Smith   ierr = PetscBTDestroy(&table_out);CHKERRQ(ierr);
89d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
90d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
9194bacf5dSBarry Smith   ierr = PetscBTDestroy(&table_in);CHKERRQ(ierr);
925eee224dSHong Zhang   PetscFunctionReturn(0);
9349b5e25fSSatish Balay }
9449b5e25fSSatish Balay 
95*847daeccSHong Zhang /* Bseq is non-symmetric SBAIJ matrix, only used internally by PETSc.
96*847daeccSHong Zhang         Zero some ops' to avoid invalid usse */
974a2ae208SSatish Balay #undef __FUNCT__
98*847daeccSHong Zhang #define __FUNCT__ "MatSeqSBAIJZeroOps_Private"
99*847daeccSHong Zhang PetscErrorCode MatSeqSBAIJZeroOps_Private(Mat Bseq)
10049b5e25fSSatish Balay {
1016849ba73SBarry Smith   PetscErrorCode ierr;
10249b5e25fSSatish Balay 
10349b5e25fSSatish Balay   PetscFunctionBegin;
104*847daeccSHong Zhang   ierr = MatSetOption(Bseq,MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
105*847daeccSHong Zhang   Bseq->ops->mult                   = 0;
106*847daeccSHong Zhang   Bseq->ops->multadd                = 0;
107*847daeccSHong Zhang   Bseq->ops->multtranspose          = 0;
108*847daeccSHong Zhang   Bseq->ops->multtransposeadd       = 0;
109*847daeccSHong Zhang   Bseq->ops->lufactor               = 0;
110*847daeccSHong Zhang   Bseq->ops->choleskyfactor         = 0;
111*847daeccSHong Zhang   Bseq->ops->lufactorsymbolic       = 0;
112*847daeccSHong Zhang   Bseq->ops->choleskyfactorsymbolic = 0;
113*847daeccSHong Zhang   Bseq->ops->getinertia             = 0;
11449b5e25fSSatish Balay   PetscFunctionReturn(0);
11549b5e25fSSatish Balay }
11649b5e25fSSatish Balay 
1174a2ae208SSatish Balay #undef __FUNCT__
1184a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
1194aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
12049b5e25fSSatish Balay {
1216849ba73SBarry Smith   PetscErrorCode ierr;
12249b5e25fSSatish Balay 
12349b5e25fSSatish Balay   PetscFunctionBegin;
124*847daeccSHong Zhang   ierr = MatGetSubMatrix_SeqBAIJ(A,isrow,iscol,scall,B);CHKERRQ(ierr);
125*847daeccSHong Zhang 
1268f46ffcaSHong Zhang   if (isrow != iscol) {
1278f46ffcaSHong Zhang     PetscBool isequal;
1288f46ffcaSHong Zhang     ierr = ISEqual(isrow,iscol,&isequal);CHKERRQ(ierr);
129*847daeccSHong Zhang     if (!isequal) {
130*847daeccSHong Zhang       ierr = MatSeqSBAIJZeroOps_Private(*B);CHKERRQ(ierr);
1318f46ffcaSHong Zhang     }
13249b5e25fSSatish Balay   }
13349b5e25fSSatish Balay   PetscFunctionReturn(0);
13449b5e25fSSatish Balay }
13549b5e25fSSatish Balay 
1364a2ae208SSatish Balay #undef __FUNCT__
1374a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
13813f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
13949b5e25fSSatish Balay {
1406849ba73SBarry Smith   PetscErrorCode ierr;
14113f74950SBarry Smith   PetscInt       i;
142afebec48SHong Zhang   PetscBool      flg;
14349b5e25fSSatish Balay 
14449b5e25fSSatish Balay   PetscFunctionBegin;
145afebec48SHong Zhang   ierr = MatGetSubMatrices_SeqBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
14649b5e25fSSatish Balay   for (i=0; i<n; i++) {
147afebec48SHong Zhang     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
148afebec48SHong Zhang     if (!flg) {
149*847daeccSHong Zhang       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
150afebec48SHong Zhang     }
15149b5e25fSSatish Balay   }
15249b5e25fSSatish Balay   PetscFunctionReturn(0);
15349b5e25fSSatish Balay }
15449b5e25fSSatish Balay 
15549b5e25fSSatish Balay /* -------------------------------------------------------*/
15649b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
15749b5e25fSSatish Balay /* -------------------------------------------------------*/
15849b5e25fSSatish Balay 
1594a2ae208SSatish Balay #undef __FUNCT__
1604a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
161dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
16249b5e25fSSatish Balay {
16349b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
164d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,zero=0.0;
165d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
166d9ca1df4SBarry Smith   const MatScalar   *v;
1676849ba73SBarry Smith   PetscErrorCode    ierr;
168d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
169d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
17098c9bda7SSatish Balay   PetscInt          nonzerorow=0;
17149b5e25fSSatish Balay 
17249b5e25fSSatish Balay   PetscFunctionBegin;
1732dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
174d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1751ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
17649b5e25fSSatish Balay 
17749b5e25fSSatish Balay   v  = a->a;
17849b5e25fSSatish Balay   xb = x;
17949b5e25fSSatish Balay 
18049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
18149b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
18249b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1];
18349b5e25fSSatish Balay     ib          = aj + *ai;
184831a3094SHong Zhang     jmin        = 0;
18598c9bda7SSatish Balay     nonzerorow += (n>0);
1867fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
18749b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
18849b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
189831a3094SHong Zhang       v        += 4; jmin++;
1907fbae186SHong Zhang     }
191444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
192444d8c10SJed Brown     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
193831a3094SHong Zhang     for (j=jmin; j<n; j++) {
19449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
19549b5e25fSSatish Balay       cval       = ib[j]*2;
19649b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2;
19749b5e25fSSatish Balay       z[cval+1] += v[2]*x1 + v[3]*x2;
19849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
19949b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
20049b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
20149b5e25fSSatish Balay       v        += 4;
20249b5e25fSSatish Balay     }
20349b5e25fSSatish Balay     xb +=2; ai++;
20449b5e25fSSatish Balay   }
20549b5e25fSSatish Balay 
206d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2071ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
208dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
20949b5e25fSSatish Balay   PetscFunctionReturn(0);
21049b5e25fSSatish Balay }
21149b5e25fSSatish Balay 
2124a2ae208SSatish Balay #undef __FUNCT__
2134a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
214dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
21549b5e25fSSatish Balay {
21649b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
217d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,zero=0.0;
218d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
219d9ca1df4SBarry Smith   const MatScalar   *v;
2206849ba73SBarry Smith   PetscErrorCode    ierr;
221d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
222d9ca1df4SBarry Smith   const PetscInt    *aj = a->j,*ai = a->i,*ib;
22398c9bda7SSatish Balay   PetscInt          nonzerorow=0;
22449b5e25fSSatish Balay 
22549b5e25fSSatish Balay   PetscFunctionBegin;
2262dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
227d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2281ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
22949b5e25fSSatish Balay 
23049b5e25fSSatish Balay   v  = a->a;
23149b5e25fSSatish Balay   xb = x;
23249b5e25fSSatish Balay 
23349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
23449b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
23549b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2];
23649b5e25fSSatish Balay     ib          = aj + *ai;
237831a3094SHong Zhang     jmin        = 0;
23898c9bda7SSatish Balay     nonzerorow += (n>0);
2397fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
24049b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
24149b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
24249b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
243831a3094SHong Zhang       v        += 9; jmin++;
2447fbae186SHong Zhang     }
245444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
246444d8c10SJed Brown     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
247831a3094SHong Zhang     for (j=jmin; j<n; j++) {
24849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
24949b5e25fSSatish Balay       cval       = ib[j]*3;
25049b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3;
25149b5e25fSSatish Balay       z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
25249b5e25fSSatish Balay       z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
25349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
25449b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
25549b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
25649b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
25749b5e25fSSatish Balay       v        += 9;
25849b5e25fSSatish Balay     }
25949b5e25fSSatish Balay     xb +=3; ai++;
26049b5e25fSSatish Balay   }
26149b5e25fSSatish Balay 
262d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2631ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
264dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
26549b5e25fSSatish Balay   PetscFunctionReturn(0);
26649b5e25fSSatish Balay }
26749b5e25fSSatish Balay 
2684a2ae208SSatish Balay #undef __FUNCT__
2694a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
270dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
27149b5e25fSSatish Balay {
27249b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
273d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4,zero=0.0;
274d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
275d9ca1df4SBarry Smith   const MatScalar   *v;
2766849ba73SBarry Smith   PetscErrorCode    ierr;
277d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
278d9ca1df4SBarry Smith   const PetscInt    *aj = a->j,*ai = a->i,*ib;
27998c9bda7SSatish Balay   PetscInt          nonzerorow = 0;
28049b5e25fSSatish Balay 
28149b5e25fSSatish Balay   PetscFunctionBegin;
2822dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
283d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2841ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
28549b5e25fSSatish Balay 
28649b5e25fSSatish Balay   v  = a->a;
28749b5e25fSSatish Balay   xb = x;
28849b5e25fSSatish Balay 
28949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
29049b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
29149b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
29249b5e25fSSatish Balay     ib          = aj + *ai;
293831a3094SHong Zhang     jmin        = 0;
29498c9bda7SSatish Balay     nonzerorow += (n>0);
2957fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
29649b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
29749b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
29849b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
29949b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
300831a3094SHong Zhang       v        += 16; jmin++;
3017fbae186SHong Zhang     }
302444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
303444d8c10SJed Brown     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
304831a3094SHong Zhang     for (j=jmin; j<n; j++) {
30549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
30649b5e25fSSatish Balay       cval       = ib[j]*4;
30749b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
30849b5e25fSSatish Balay       z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
30949b5e25fSSatish Balay       z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
31049b5e25fSSatish Balay       z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
31149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
31249b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
31349b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
31449b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
31549b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
31649b5e25fSSatish Balay       v        += 16;
31749b5e25fSSatish Balay     }
31849b5e25fSSatish Balay     xb +=4; ai++;
31949b5e25fSSatish Balay   }
32049b5e25fSSatish Balay 
321d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3221ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
323dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
32449b5e25fSSatish Balay   PetscFunctionReturn(0);
32549b5e25fSSatish Balay }
32649b5e25fSSatish Balay 
3274a2ae208SSatish Balay #undef __FUNCT__
3284a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
329dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
33049b5e25fSSatish Balay {
33149b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
332d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4,x5,zero=0.0;
333d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
334d9ca1df4SBarry Smith   const MatScalar   *v;
3356849ba73SBarry Smith   PetscErrorCode    ierr;
336d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
337d9ca1df4SBarry Smith   const PetscInt    *aj = a->j,*ai = a->i,*ib;
33898c9bda7SSatish Balay   PetscInt          nonzerorow=0;
33949b5e25fSSatish Balay 
34049b5e25fSSatish Balay   PetscFunctionBegin;
3412dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
342d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3431ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
34449b5e25fSSatish Balay 
34549b5e25fSSatish Balay   v  = a->a;
34649b5e25fSSatish Balay   xb = x;
34749b5e25fSSatish Balay 
34849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
34949b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
35049b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
35149b5e25fSSatish Balay     ib          = aj + *ai;
352831a3094SHong Zhang     jmin        = 0;
35398c9bda7SSatish Balay     nonzerorow += (n>0);
3547fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
35549b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
35649b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
35749b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
35849b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
35949b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
360831a3094SHong Zhang       v        += 25; jmin++;
3617fbae186SHong Zhang     }
362444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
363444d8c10SJed Brown     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
364831a3094SHong Zhang     for (j=jmin; j<n; j++) {
36549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
36649b5e25fSSatish Balay       cval       = ib[j]*5;
36749b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
36849b5e25fSSatish Balay       z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
36949b5e25fSSatish Balay       z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
37049b5e25fSSatish Balay       z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
37149b5e25fSSatish Balay       z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
37249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
37349b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
37449b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
37549b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
37649b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
37749b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
37849b5e25fSSatish Balay       v        += 25;
37949b5e25fSSatish Balay     }
38049b5e25fSSatish Balay     xb +=5; ai++;
38149b5e25fSSatish Balay   }
38249b5e25fSSatish Balay 
383d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3841ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
385dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
38649b5e25fSSatish Balay   PetscFunctionReturn(0);
38749b5e25fSSatish Balay }
38849b5e25fSSatish Balay 
38949b5e25fSSatish Balay 
3904a2ae208SSatish Balay #undef __FUNCT__
3914a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
392dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
39349b5e25fSSatish Balay {
39449b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
395d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4,x5,x6,zero=0.0;
396d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
397d9ca1df4SBarry Smith   const MatScalar   *v;
3986849ba73SBarry Smith   PetscErrorCode    ierr;
399d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
400d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
40198c9bda7SSatish Balay   PetscInt          nonzerorow=0;
40249b5e25fSSatish Balay 
40349b5e25fSSatish Balay   PetscFunctionBegin;
4042dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
405d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4061ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
40749b5e25fSSatish Balay 
40849b5e25fSSatish Balay   v  = a->a;
40949b5e25fSSatish Balay   xb = x;
41049b5e25fSSatish Balay 
41149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
41249b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
41349b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
41449b5e25fSSatish Balay     ib          = aj + *ai;
415831a3094SHong Zhang     jmin        = 0;
41698c9bda7SSatish Balay     nonzerorow += (n>0);
4177fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
41849b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
41949b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
42049b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
42149b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
42249b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
42349b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
424831a3094SHong Zhang       v        += 36; jmin++;
4257fbae186SHong Zhang     }
426444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
427444d8c10SJed Brown     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
428831a3094SHong Zhang     for (j=jmin; j<n; j++) {
42949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
43049b5e25fSSatish Balay       cval       = ib[j]*6;
43149b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
43249b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
43349b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
43449b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
43549b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
43649b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
43749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
43849b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
43949b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
44049b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
44149b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
44249b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
44349b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
44449b5e25fSSatish Balay       v        += 36;
44549b5e25fSSatish Balay     }
44649b5e25fSSatish Balay     xb +=6; ai++;
44749b5e25fSSatish Balay   }
44849b5e25fSSatish Balay 
449d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4501ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
451dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
45249b5e25fSSatish Balay   PetscFunctionReturn(0);
45349b5e25fSSatish Balay }
4544a2ae208SSatish Balay #undef __FUNCT__
4554a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
456dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
45749b5e25fSSatish Balay {
45849b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
459d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
460d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
461d9ca1df4SBarry Smith   const MatScalar   *v;
4626849ba73SBarry Smith   PetscErrorCode    ierr;
463d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
464d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
46598c9bda7SSatish Balay   PetscInt          nonzerorow=0;
46649b5e25fSSatish Balay 
46749b5e25fSSatish Balay   PetscFunctionBegin;
4682dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
469d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4701ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
47149b5e25fSSatish Balay 
47249b5e25fSSatish Balay   v  = a->a;
47349b5e25fSSatish Balay   xb = x;
47449b5e25fSSatish Balay 
47549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
47649b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
47749b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
47849b5e25fSSatish Balay     ib          = aj + *ai;
479831a3094SHong Zhang     jmin        = 0;
48098c9bda7SSatish Balay     nonzerorow += (n>0);
4817fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
48249b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
48349b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
48449b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
48549b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
48649b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
48749b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
48849b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
489831a3094SHong Zhang       v        += 49; jmin++;
4907fbae186SHong Zhang     }
491444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
492444d8c10SJed Brown     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
493831a3094SHong Zhang     for (j=jmin; j<n; j++) {
49449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
49549b5e25fSSatish Balay       cval       = ib[j]*7;
49649b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
49749b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
49849b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
49949b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
50049b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
50149b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
50249b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
50349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
50449b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
50549b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
50649b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
50749b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
50849b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
50949b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
51049b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
51149b5e25fSSatish Balay       v       += 49;
51249b5e25fSSatish Balay     }
51349b5e25fSSatish Balay     xb +=7; ai++;
51449b5e25fSSatish Balay   }
515d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5161ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
517dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
51849b5e25fSSatish Balay   PetscFunctionReturn(0);
51949b5e25fSSatish Balay }
52049b5e25fSSatish Balay 
52149b5e25fSSatish Balay /*
52249b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
52349b5e25fSSatish Balay */
5244a2ae208SSatish Balay #undef __FUNCT__
5254a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
526dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
52749b5e25fSSatish Balay {
52849b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
529d9ca1df4SBarry Smith   PetscScalar       *z,*z_ptr,*zb,*work,*workt,zero=0.0;
530d9ca1df4SBarry Smith   const PetscScalar *x,*x_ptr,*xb;
531d9ca1df4SBarry Smith   const MatScalar   *v;
532dfbe8321SBarry Smith   PetscErrorCode    ierr;
533d9ca1df4SBarry Smith   PetscInt          mbs =a->mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
534d9ca1df4SBarry Smith   const PetscInt    *idx,*aj,*ii;
53598c9bda7SSatish Balay   PetscInt          nonzerorow=0;
53649b5e25fSSatish Balay 
53749b5e25fSSatish Balay   PetscFunctionBegin;
5382dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
539d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);x_ptr = x;
5401ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
54149b5e25fSSatish Balay 
54249b5e25fSSatish Balay   aj = a->j;
54349b5e25fSSatish Balay   v  = a->a;
54449b5e25fSSatish Balay   ii = a->i;
54549b5e25fSSatish Balay 
54649b5e25fSSatish Balay   if (!a->mult_work) {
547854ce69bSBarry Smith     ierr = PetscMalloc1(A->rmap->N+1,&a->mult_work);CHKERRQ(ierr);
54849b5e25fSSatish Balay   }
54949b5e25fSSatish Balay   work = a->mult_work;
55049b5e25fSSatish Balay 
55149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
55249b5e25fSSatish Balay     n           = ii[1] - ii[0]; ncols = n*bs;
55349b5e25fSSatish Balay     workt       = work; idx=aj+ii[0];
55498c9bda7SSatish Balay     nonzerorow += (n>0);
55549b5e25fSSatish Balay 
55649b5e25fSSatish Balay     /* upper triangular part */
55749b5e25fSSatish Balay     for (j=0; j<n; j++) {
55849b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
55949b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
56049b5e25fSSatish Balay       workt += bs;
56149b5e25fSSatish Balay     }
56249b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
56396b95a6bSBarry Smith     PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
56449b5e25fSSatish Balay 
56549b5e25fSSatish Balay     /* strict lower triangular part */
566831a3094SHong Zhang     idx = aj+ii[0];
567831a3094SHong Zhang     if (*idx == i) {
56896b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
569831a3094SHong Zhang     }
57096b9376eSHong Zhang 
57149b5e25fSSatish Balay     if (ncols > 0) {
57249b5e25fSSatish Balay       workt = work;
57387828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
57496b95a6bSBarry Smith       PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
575831a3094SHong Zhang       for (j=0; j<n; j++) {
576831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
57749b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k];
57849b5e25fSSatish Balay         workt += bs;
57949b5e25fSSatish Balay       }
58049b5e25fSSatish Balay     }
58149b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
58249b5e25fSSatish Balay   }
58349b5e25fSSatish Balay 
584d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5851ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
586dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr);
58749b5e25fSSatish Balay   PetscFunctionReturn(0);
58849b5e25fSSatish Balay }
58949b5e25fSSatish Balay 
5904a2ae208SSatish Balay #undef __FUNCT__
5914a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
592dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
59349b5e25fSSatish Balay {
59449b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
595d9ca1df4SBarry Smith   PetscScalar       *z,x1;
596d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
597d9ca1df4SBarry Smith   const MatScalar   *v;
5986849ba73SBarry Smith   PetscErrorCode    ierr;
599d9ca1df4SBarry Smith   PetscInt          mbs =a->mbs,i,n,cval,j,jmin;
600d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
60198c9bda7SSatish Balay   PetscInt          nonzerorow=0;
60249b5e25fSSatish Balay 
60349b5e25fSSatish Balay   PetscFunctionBegin;
604343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
605d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6061ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
60749b5e25fSSatish Balay   v    = a->a;
60849b5e25fSSatish Balay   xb   = x;
60949b5e25fSSatish Balay 
61049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
61149b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th row of A */
61249b5e25fSSatish Balay     x1          = xb[0];
61349b5e25fSSatish Balay     ib          = aj + *ai;
614831a3094SHong Zhang     jmin        = 0;
61598c9bda7SSatish Balay     nonzerorow += (n>0);
616831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
617831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
618831a3094SHong Zhang     }
619831a3094SHong Zhang     for (j=jmin; j<n; j++) {
62049b5e25fSSatish Balay       cval    = *ib;
62149b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
62249b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
62349b5e25fSSatish Balay     }
62449b5e25fSSatish Balay     xb++; ai++;
62549b5e25fSSatish Balay   }
62649b5e25fSSatish Balay 
627d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
628bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
62949b5e25fSSatish Balay 
630dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
63149b5e25fSSatish Balay   PetscFunctionReturn(0);
63249b5e25fSSatish Balay }
63349b5e25fSSatish Balay 
6344a2ae208SSatish Balay #undef __FUNCT__
6354a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
636dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
63749b5e25fSSatish Balay {
63849b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
639d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2;
640d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
641d9ca1df4SBarry Smith   const MatScalar   *v;
6426849ba73SBarry Smith   PetscErrorCode    ierr;
643d9ca1df4SBarry Smith   PetscInt          mbs =a->mbs,i,n,cval,j,jmin;
644d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
64598c9bda7SSatish Balay   PetscInt          nonzerorow=0;
64649b5e25fSSatish Balay 
64749b5e25fSSatish Balay   PetscFunctionBegin;
648343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
649d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6501ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
65149b5e25fSSatish Balay 
65249b5e25fSSatish Balay   v  = a->a;
65349b5e25fSSatish Balay   xb = x;
65449b5e25fSSatish Balay 
65549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
65649b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
65749b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1];
65849b5e25fSSatish Balay     ib          = aj + *ai;
659831a3094SHong Zhang     jmin        = 0;
66098c9bda7SSatish Balay     nonzerorow += (n>0);
6617fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
66249b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
66349b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
664831a3094SHong Zhang       v        += 4; jmin++;
6657fbae186SHong Zhang     }
666444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
667444d8c10SJed Brown     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
668831a3094SHong Zhang     for (j=jmin; j<n; j++) {
66949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
67049b5e25fSSatish Balay       cval       = ib[j]*2;
67149b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2;
67249b5e25fSSatish Balay       z[cval+1] += v[2]*x1 + v[3]*x2;
67349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
67449b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
67549b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
67649b5e25fSSatish Balay       v        += 4;
67749b5e25fSSatish Balay     }
67849b5e25fSSatish Balay     xb +=2; ai++;
67949b5e25fSSatish Balay   }
680d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
681bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
68249b5e25fSSatish Balay 
683dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
68449b5e25fSSatish Balay   PetscFunctionReturn(0);
68549b5e25fSSatish Balay }
68649b5e25fSSatish Balay 
6874a2ae208SSatish Balay #undef __FUNCT__
6884a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
689dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
69049b5e25fSSatish Balay {
69149b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
692d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3;
693d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
694d9ca1df4SBarry Smith   const MatScalar   *v;
6956849ba73SBarry Smith   PetscErrorCode    ierr;
696d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
697d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
69898c9bda7SSatish Balay   PetscInt          nonzerorow=0;
69949b5e25fSSatish Balay 
70049b5e25fSSatish Balay   PetscFunctionBegin;
701343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
702d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7031ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
70449b5e25fSSatish Balay 
70549b5e25fSSatish Balay   v  = a->a;
70649b5e25fSSatish Balay   xb = x;
70749b5e25fSSatish Balay 
70849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
70949b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
71049b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2];
71149b5e25fSSatish Balay     ib          = aj + *ai;
712831a3094SHong Zhang     jmin        = 0;
71398c9bda7SSatish Balay     nonzerorow += (n>0);
7147fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
71549b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
71649b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
71749b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
718831a3094SHong Zhang       v        += 9; jmin++;
7197fbae186SHong Zhang     }
720444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
721444d8c10SJed Brown     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
722831a3094SHong Zhang     for (j=jmin; j<n; j++) {
72349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
72449b5e25fSSatish Balay       cval       = ib[j]*3;
72549b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3;
72649b5e25fSSatish Balay       z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
72749b5e25fSSatish Balay       z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
72849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
72949b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
73049b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
73149b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
73249b5e25fSSatish Balay       v        += 9;
73349b5e25fSSatish Balay     }
73449b5e25fSSatish Balay     xb +=3; ai++;
73549b5e25fSSatish Balay   }
73649b5e25fSSatish Balay 
737d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
738bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
73949b5e25fSSatish Balay 
740dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
74149b5e25fSSatish Balay   PetscFunctionReturn(0);
74249b5e25fSSatish Balay }
74349b5e25fSSatish Balay 
7444a2ae208SSatish Balay #undef __FUNCT__
7454a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
746dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
74749b5e25fSSatish Balay {
74849b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
749d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4;
750d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
751d9ca1df4SBarry Smith   const MatScalar   *v;
7526849ba73SBarry Smith   PetscErrorCode    ierr;
753d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
754d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
75598c9bda7SSatish Balay   PetscInt          nonzerorow=0;
75649b5e25fSSatish Balay 
75749b5e25fSSatish Balay   PetscFunctionBegin;
758343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
759d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7601ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
76149b5e25fSSatish Balay 
76249b5e25fSSatish Balay   v  = a->a;
76349b5e25fSSatish Balay   xb = x;
76449b5e25fSSatish Balay 
76549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
76649b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
76749b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
76849b5e25fSSatish Balay     ib          = aj + *ai;
769831a3094SHong Zhang     jmin        = 0;
77098c9bda7SSatish Balay     nonzerorow += (n>0);
7717fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
77249b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
77349b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
77449b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
77549b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
776831a3094SHong Zhang       v        += 16; jmin++;
7777fbae186SHong Zhang     }
778444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
779444d8c10SJed Brown     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
780831a3094SHong Zhang     for (j=jmin; j<n; j++) {
78149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
78249b5e25fSSatish Balay       cval       = ib[j]*4;
78349b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
78449b5e25fSSatish Balay       z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
78549b5e25fSSatish Balay       z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
78649b5e25fSSatish Balay       z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
78749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
78849b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
78949b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
79049b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
79149b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
79249b5e25fSSatish Balay       v        += 16;
79349b5e25fSSatish Balay     }
79449b5e25fSSatish Balay     xb +=4; ai++;
79549b5e25fSSatish Balay   }
79649b5e25fSSatish Balay 
797d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
798bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
79949b5e25fSSatish Balay 
800dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
80149b5e25fSSatish Balay   PetscFunctionReturn(0);
80249b5e25fSSatish Balay }
80349b5e25fSSatish Balay 
8044a2ae208SSatish Balay #undef __FUNCT__
8054a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
806dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
80749b5e25fSSatish Balay {
80849b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
809d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4,x5;
810d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
811d9ca1df4SBarry Smith   const MatScalar   *v;
8126849ba73SBarry Smith   PetscErrorCode    ierr;
813d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
814d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
81598c9bda7SSatish Balay   PetscInt          nonzerorow=0;
81649b5e25fSSatish Balay 
81749b5e25fSSatish Balay   PetscFunctionBegin;
818343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
819d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8201ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
82149b5e25fSSatish Balay 
82249b5e25fSSatish Balay   v  = a->a;
82349b5e25fSSatish Balay   xb = x;
82449b5e25fSSatish Balay 
82549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
82649b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
82749b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
82849b5e25fSSatish Balay     ib          = aj + *ai;
829831a3094SHong Zhang     jmin        = 0;
83098c9bda7SSatish Balay     nonzerorow += (n>0);
8317fbae186SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
83249b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
83349b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
83449b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
83549b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
83649b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
837831a3094SHong Zhang       v        += 25; jmin++;
8387fbae186SHong Zhang     }
839444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
840444d8c10SJed Brown     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
841831a3094SHong Zhang     for (j=jmin; j<n; j++) {
84249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
84349b5e25fSSatish Balay       cval       = ib[j]*5;
84449b5e25fSSatish Balay       z[cval]   += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
84549b5e25fSSatish Balay       z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
84649b5e25fSSatish Balay       z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
84749b5e25fSSatish Balay       z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
84849b5e25fSSatish Balay       z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
84949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
85049b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
85149b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
85249b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
85349b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
85449b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
85549b5e25fSSatish Balay       v        += 25;
85649b5e25fSSatish Balay     }
85749b5e25fSSatish Balay     xb +=5; ai++;
85849b5e25fSSatish Balay   }
85949b5e25fSSatish Balay 
860d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
861bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
86249b5e25fSSatish Balay 
863dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
86449b5e25fSSatish Balay   PetscFunctionReturn(0);
86549b5e25fSSatish Balay }
8664a2ae208SSatish Balay #undef __FUNCT__
8674a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
868dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
86949b5e25fSSatish Balay {
87049b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
871d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4,x5,x6;
872d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
873d9ca1df4SBarry Smith   const MatScalar   *v;
8746849ba73SBarry Smith   PetscErrorCode    ierr;
875d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
876d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
87798c9bda7SSatish Balay   PetscInt          nonzerorow=0;
87849b5e25fSSatish Balay 
87949b5e25fSSatish Balay   PetscFunctionBegin;
880343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
881d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8821ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
88349b5e25fSSatish Balay 
88449b5e25fSSatish Balay   v  = a->a;
88549b5e25fSSatish Balay   xb = x;
88649b5e25fSSatish Balay 
88749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
88849b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
88949b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
89049b5e25fSSatish Balay     ib          = aj + *ai;
891831a3094SHong Zhang     jmin        = 0;
89298c9bda7SSatish Balay     nonzerorow += (n>0);
8937fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
89449b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
89549b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
89649b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
89749b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
89849b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
89949b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
900831a3094SHong Zhang       v        += 36; jmin++;
9017fbae186SHong Zhang     }
902444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
903444d8c10SJed Brown     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
904831a3094SHong Zhang     for (j=jmin; j<n; j++) {
90549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
90649b5e25fSSatish Balay       cval       = ib[j]*6;
90749b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
90849b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
90949b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
91049b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
91149b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
91249b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
91349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
91449b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
91549b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
91649b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
91749b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
91849b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
91949b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
92049b5e25fSSatish Balay       v        += 36;
92149b5e25fSSatish Balay     }
92249b5e25fSSatish Balay     xb +=6; ai++;
92349b5e25fSSatish Balay   }
92449b5e25fSSatish Balay 
925d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
926bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
92749b5e25fSSatish Balay 
928dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
92949b5e25fSSatish Balay   PetscFunctionReturn(0);
93049b5e25fSSatish Balay }
93149b5e25fSSatish Balay 
9324a2ae208SSatish Balay #undef __FUNCT__
9334a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
934dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
93549b5e25fSSatish Balay {
93649b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
937d9ca1df4SBarry Smith   PetscScalar       *z,x1,x2,x3,x4,x5,x6,x7;
938d9ca1df4SBarry Smith   const PetscScalar *x,*xb;
939d9ca1df4SBarry Smith   const MatScalar   *v;
9406849ba73SBarry Smith   PetscErrorCode    ierr;
941d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,n,cval,j,jmin;
942d9ca1df4SBarry Smith   const PetscInt    *aj=a->j,*ai=a->i,*ib;
94398c9bda7SSatish Balay   PetscInt          nonzerorow=0;
94449b5e25fSSatish Balay 
94549b5e25fSSatish Balay   PetscFunctionBegin;
946343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
947d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9481ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
94949b5e25fSSatish Balay 
95049b5e25fSSatish Balay   v  = a->a;
95149b5e25fSSatish Balay   xb = x;
95249b5e25fSSatish Balay 
95349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
95449b5e25fSSatish Balay     n           = ai[1] - ai[0]; /* length of i_th block row of A */
95549b5e25fSSatish Balay     x1          = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
95649b5e25fSSatish Balay     ib          = aj + *ai;
957831a3094SHong Zhang     jmin        = 0;
95898c9bda7SSatish Balay     nonzerorow += (n>0);
9597fbae186SHong Zhang     if (*ib == i) {     /* (diag of A)*x */
96049b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
96149b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
96249b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
96349b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
96449b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
96549b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
96649b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
967831a3094SHong Zhang       v        += 49; jmin++;
9687fbae186SHong Zhang     }
969444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
970444d8c10SJed Brown     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
971831a3094SHong Zhang     for (j=jmin; j<n; j++) {
97249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
97349b5e25fSSatish Balay       cval       = ib[j]*7;
97449b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
97549b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
97649b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
97749b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
97849b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
97949b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
98049b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
98149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
98249b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
98349b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
98449b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
98549b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
98649b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
98749b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
98849b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
98949b5e25fSSatish Balay       v       += 49;
99049b5e25fSSatish Balay     }
99149b5e25fSSatish Balay     xb +=7; ai++;
99249b5e25fSSatish Balay   }
99349b5e25fSSatish Balay 
994d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
995bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
99649b5e25fSSatish Balay 
997dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
99849b5e25fSSatish Balay   PetscFunctionReturn(0);
99949b5e25fSSatish Balay }
100049b5e25fSSatish Balay 
10014a2ae208SSatish Balay #undef __FUNCT__
10024a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1003dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
100449b5e25fSSatish Balay {
100549b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
1006d9ca1df4SBarry Smith   PetscScalar       *z,*z_ptr=0,*zb,*work,*workt;
1007d9ca1df4SBarry Smith   const PetscScalar *x,*x_ptr,*xb;
1008d9ca1df4SBarry Smith   const MatScalar   *v;
1009dfbe8321SBarry Smith   PetscErrorCode    ierr;
1010d9ca1df4SBarry Smith   PetscInt          mbs = a->mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
1011d9ca1df4SBarry Smith   const PetscInt    *idx,*aj,*ii;
101298c9bda7SSatish Balay   PetscInt          nonzerorow=0;
101349b5e25fSSatish Balay 
101449b5e25fSSatish Balay   PetscFunctionBegin;
1015343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
1016d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); x_ptr=x;
10171ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
101849b5e25fSSatish Balay 
101949b5e25fSSatish Balay   aj = a->j;
102049b5e25fSSatish Balay   v  = a->a;
102149b5e25fSSatish Balay   ii = a->i;
102249b5e25fSSatish Balay 
102349b5e25fSSatish Balay   if (!a->mult_work) {
1024854ce69bSBarry Smith     ierr = PetscMalloc1(A->rmap->n+1,&a->mult_work);CHKERRQ(ierr);
102549b5e25fSSatish Balay   }
102649b5e25fSSatish Balay   work = a->mult_work;
102749b5e25fSSatish Balay 
102849b5e25fSSatish Balay 
102949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
103049b5e25fSSatish Balay     n           = ii[1] - ii[0]; ncols = n*bs;
103149b5e25fSSatish Balay     workt       = work; idx=aj+ii[0];
103298c9bda7SSatish Balay     nonzerorow += (n>0);
103349b5e25fSSatish Balay 
103449b5e25fSSatish Balay     /* upper triangular part */
103549b5e25fSSatish Balay     for (j=0; j<n; j++) {
103649b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
103749b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
103849b5e25fSSatish Balay       workt += bs;
103949b5e25fSSatish Balay     }
104049b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
104196b95a6bSBarry Smith     PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
104249b5e25fSSatish Balay 
104349b5e25fSSatish Balay     /* strict lower triangular part */
1044831a3094SHong Zhang     idx = aj+ii[0];
1045831a3094SHong Zhang     if (*idx == i) {
104696b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1047831a3094SHong Zhang     }
104849b5e25fSSatish Balay     if (ncols > 0) {
104949b5e25fSSatish Balay       workt = work;
105087828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
105196b95a6bSBarry Smith       PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1052831a3094SHong Zhang       for (j=0; j<n; j++) {
1053831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
105449b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k];
105549b5e25fSSatish Balay         workt += bs;
105649b5e25fSSatish Balay       }
105749b5e25fSSatish Balay     }
105849b5e25fSSatish Balay 
105949b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
106049b5e25fSSatish Balay   }
106149b5e25fSSatish Balay 
1062d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1063bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
106449b5e25fSSatish Balay 
1065dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
106649b5e25fSSatish Balay   PetscFunctionReturn(0);
106749b5e25fSSatish Balay }
106849b5e25fSSatish Balay 
10694a2ae208SSatish Balay #undef __FUNCT__
10704a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1071f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
107249b5e25fSSatish Balay {
107349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a     = (Mat_SeqSBAIJ*)inA->data;
1074f4df32b1SMatthew Knepley   PetscScalar    oalpha = alpha;
1075efee365bSSatish Balay   PetscErrorCode ierr;
1076c5df96a5SBarry Smith   PetscBLASInt   one = 1,totalnz;
107749b5e25fSSatish Balay 
107849b5e25fSSatish Balay   PetscFunctionBegin;
1079c5df96a5SBarry Smith   ierr = PetscBLASIntCast(a->bs2*a->nz,&totalnz);CHKERRQ(ierr);
10808b83055fSJed Brown   PetscStackCallBLAS("BLASscal",BLASscal_(&totalnz,&oalpha,a->a,&one));
1081efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
108249b5e25fSSatish Balay   PetscFunctionReturn(0);
108349b5e25fSSatish Balay }
108449b5e25fSSatish Balay 
10854a2ae208SSatish Balay #undef __FUNCT__
10864a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1087dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
108849b5e25fSSatish Balay {
108949b5e25fSSatish Balay   Mat_SeqSBAIJ    *a       = (Mat_SeqSBAIJ*)A->data;
1090d9ca1df4SBarry Smith   const MatScalar *v       = a->a;
109149b5e25fSSatish Balay   PetscReal       sum_diag = 0.0, sum_off = 0.0, *sum;
1092d9ca1df4SBarry Smith   PetscInt        i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,jmin,jmax,nexti,ik,*jl,*il;
10936849ba73SBarry Smith   PetscErrorCode  ierr;
1094d9ca1df4SBarry Smith   const PetscInt  *aj=a->j,*col;
109549b5e25fSSatish Balay 
109649b5e25fSSatish Balay   PetscFunctionBegin;
109749b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
109849b5e25fSSatish Balay     for (k=0; k<mbs; k++) {
109949b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1100831a3094SHong Zhang       col  = aj + jmin;
1101831a3094SHong Zhang       if (*col == k) {         /* diagonal block */
110249b5e25fSSatish Balay         for (i=0; i<bs2; i++) {
110349b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
110449b5e25fSSatish Balay         }
1105831a3094SHong Zhang         jmin++;
1106831a3094SHong Zhang       }
1107831a3094SHong Zhang       for (j=jmin; j<jmax; j++) {  /* off-diagonal blocks */
110849b5e25fSSatish Balay         for (i=0; i<bs2; i++) {
110949b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
111049b5e25fSSatish Balay         }
111149b5e25fSSatish Balay       }
111249b5e25fSSatish Balay     }
11138f1a2a5eSBarry Smith     *norm = PetscSqrtReal(sum_diag + 2*sum_off);
11140b8dc8d2SHong Zhang   } else if (type == NORM_INFINITY || type == NORM_1) {  /* maximum row/column sum */
1115dcca6d9dSJed Brown     ierr = PetscMalloc3(bs,&sum,mbs,&il,mbs,&jl);CHKERRQ(ierr);
11160b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11170b8dc8d2SHong Zhang     il[0] = 0;
111849b5e25fSSatish Balay 
111949b5e25fSSatish Balay     *norm = 0.0;
112049b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
112149b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
112249b5e25fSSatish Balay       /*-- col sum --*/
112349b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1124831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1125831a3094SHong Zhang                   at step k */
112649b5e25fSSatish Balay       while (i<mbs) {
112749b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
112849b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
112949b5e25fSSatish Balay         for (j=0; j<bs; j++) {
113049b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
113149b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
113249b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
113349b5e25fSSatish Balay           }
113449b5e25fSSatish Balay         }
113549b5e25fSSatish Balay         /* update il, jl */
1136831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1137831a3094SHong Zhang         jmax = a->i[i+1];
113849b5e25fSSatish Balay         if (jmin < jmax) {
113949b5e25fSSatish Balay           il[i] = jmin;
114049b5e25fSSatish Balay           j     = a->j[jmin];
114149b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
114249b5e25fSSatish Balay         }
114349b5e25fSSatish Balay         i = nexti;
114449b5e25fSSatish Balay       }
114549b5e25fSSatish Balay       /*-- row sum --*/
114649b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
114749b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
114849b5e25fSSatish Balay         for (j=0; j<bs; j++) {
114949b5e25fSSatish Balay           v = a->a + i*bs2 + j;
115049b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
11510b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
115249b5e25fSSatish Balay           }
115349b5e25fSSatish Balay         }
115449b5e25fSSatish Balay       }
115549b5e25fSSatish Balay       /* add k_th block row to il, jl */
1156831a3094SHong Zhang       col = aj+jmin;
1157831a3094SHong Zhang       if (*col == k) jmin++;
115849b5e25fSSatish Balay       if (jmin < jmax) {
115949b5e25fSSatish Balay         il[k] = jmin;
11600b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
116149b5e25fSSatish Balay       }
116249b5e25fSSatish Balay       for (j=0; j<bs; j++) {
116349b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
116449b5e25fSSatish Balay       }
116549b5e25fSSatish Balay     }
116674ed9c26SBarry Smith     ierr = PetscFree3(sum,il,jl);CHKERRQ(ierr);
1167f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
116849b5e25fSSatish Balay   PetscFunctionReturn(0);
116949b5e25fSSatish Balay }
117049b5e25fSSatish Balay 
11714a2ae208SSatish Balay #undef __FUNCT__
11724a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1173ace3abfcSBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscBool * flg)
117449b5e25fSSatish Balay {
117549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)B->data;
1176dfbe8321SBarry Smith   PetscErrorCode ierr;
117749b5e25fSSatish Balay 
117849b5e25fSSatish Balay   PetscFunctionBegin;
117949b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1180d0f46423SBarry Smith   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1181ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1182ef511fbeSHong Zhang     PetscFunctionReturn(0);
118349b5e25fSSatish Balay   }
118449b5e25fSSatish Balay 
118549b5e25fSSatish Balay   /* if the a->i are the same */
118613f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
118726fbe8dcSKarl Rupp   if (!*flg) PetscFunctionReturn(0);
118849b5e25fSSatish Balay 
118949b5e25fSSatish Balay   /* if a->j are the same */
119013f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
119126fbe8dcSKarl Rupp   if (!*flg) PetscFunctionReturn(0);
119226fbe8dcSKarl Rupp 
119349b5e25fSSatish Balay   /* if a->a are the same */
1194d0f46423SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1195935af2e7SHong Zhang   PetscFunctionReturn(0);
119649b5e25fSSatish Balay }
119749b5e25fSSatish Balay 
11984a2ae208SSatish Balay #undef __FUNCT__
11994a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1200dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
120149b5e25fSSatish Balay {
120249b5e25fSSatish Balay   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
1203dfbe8321SBarry Smith   PetscErrorCode  ierr;
1204d9ca1df4SBarry Smith   PetscInt        i,j,k,row,bs,ambs,bs2;
1205d9ca1df4SBarry Smith   const PetscInt  *ai,*aj;
120687828ca2SBarry Smith   PetscScalar     *x,zero = 0.0;
1207d9ca1df4SBarry Smith   const MatScalar *aa,*aa_j;
120849b5e25fSSatish Balay 
120949b5e25fSSatish Balay   PetscFunctionBegin;
1210d0f46423SBarry Smith   bs = A->rmap->bs;
1211e32f2f54SBarry Smith   if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
121282799104SHong Zhang 
121349b5e25fSSatish Balay   aa   = a->a;
12148a0c6efdSHong Zhang   ambs = a->mbs;
12158a0c6efdSHong Zhang 
12168a0c6efdSHong Zhang   if (A->factortype == MAT_FACTOR_CHOLESKY || A->factortype == MAT_FACTOR_ICC) {
12178a0c6efdSHong Zhang     PetscInt *diag=a->diag;
12188a0c6efdSHong Zhang     aa   = a->a;
12198a0c6efdSHong Zhang     ambs = a->mbs;
12208a0c6efdSHong Zhang     ierr = VecGetArray(v,&x);CHKERRQ(ierr);
12218a0c6efdSHong Zhang     for (i=0; i<ambs; i++) x[i] = 1.0/aa[diag[i]];
12228a0c6efdSHong Zhang     ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
12238a0c6efdSHong Zhang     PetscFunctionReturn(0);
12248a0c6efdSHong Zhang   }
12258a0c6efdSHong Zhang 
122649b5e25fSSatish Balay   ai   = a->i;
122749b5e25fSSatish Balay   aj   = a->j;
122849b5e25fSSatish Balay   bs2  = a->bs2;
12292dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
12301ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
123149b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
123249b5e25fSSatish Balay     j=ai[i];
123349b5e25fSSatish Balay     if (aj[j] == i) {    /* if this is a diagonal element */
123449b5e25fSSatish Balay       row  = i*bs;
123549b5e25fSSatish Balay       aa_j = aa + j*bs2;
123649b5e25fSSatish Balay       for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
123749b5e25fSSatish Balay     }
123849b5e25fSSatish Balay   }
12391ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
124049b5e25fSSatish Balay   PetscFunctionReturn(0);
124149b5e25fSSatish Balay }
124249b5e25fSSatish Balay 
12434a2ae208SSatish Balay #undef __FUNCT__
12444a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1245dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
124649b5e25fSSatish Balay {
124749b5e25fSSatish Balay   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
1248d9ca1df4SBarry Smith   PetscScalar       x;
1249d9ca1df4SBarry Smith   const PetscScalar *l,*li,*ri;
125049b5e25fSSatish Balay   MatScalar         *aa,*v;
1251dfbe8321SBarry Smith   PetscErrorCode    ierr;
12525e90f9d9SHong Zhang   PetscInt          i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1253ace3abfcSBarry Smith   PetscBool         flg;
125449b5e25fSSatish Balay 
125549b5e25fSSatish Balay   PetscFunctionBegin;
1256b3bf805bSHong Zhang   if (ll != rr) {
1257b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1258e7e72b3dSBarry Smith     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1259b3bf805bSHong Zhang   }
1260b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
126149b5e25fSSatish Balay   ai  = a->i;
126249b5e25fSSatish Balay   aj  = a->j;
126349b5e25fSSatish Balay   aa  = a->a;
1264d0f46423SBarry Smith   m   = A->rmap->N;
1265d0f46423SBarry Smith   bs  = A->rmap->bs;
126649b5e25fSSatish Balay   mbs = a->mbs;
126749b5e25fSSatish Balay   bs2 = a->bs2;
126849b5e25fSSatish Balay 
1269d9ca1df4SBarry Smith   ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
127049b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1271e32f2f54SBarry Smith   if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
127249b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
127349b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
127449b5e25fSSatish Balay     li = l + i*bs;
127549b5e25fSSatish Balay     v  = aa + bs2*ai[i];
127649b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
127749b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
12785e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
127949b5e25fSSatish Balay         x = ri[k];
128049b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
128149b5e25fSSatish Balay       }
128249b5e25fSSatish Balay     }
128349b5e25fSSatish Balay   }
1284d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
1285dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
128649b5e25fSSatish Balay   PetscFunctionReturn(0);
128749b5e25fSSatish Balay }
128849b5e25fSSatish Balay 
12894a2ae208SSatish Balay #undef __FUNCT__
12904a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1291dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
129249b5e25fSSatish Balay {
129349b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
129449b5e25fSSatish Balay 
129549b5e25fSSatish Balay   PetscFunctionBegin;
129649b5e25fSSatish Balay   info->block_size   = a->bs2;
1297ceed8ce5SJed Brown   info->nz_allocated = a->bs2*a->maxnz;   /*num. of nonzeros in upper triangular part */
12986c6c5352SBarry Smith   info->nz_used      = a->bs2*a->nz;   /*num. of nonzeros in upper triangular part */
129949b5e25fSSatish Balay   info->nz_unneeded  = (double)(info->nz_allocated - info->nz_used);
130049b5e25fSSatish Balay   info->assemblies   = A->num_ass;
13018e58a170SBarry Smith   info->mallocs      = A->info.mallocs;
13027adad957SLisandro Dalcin   info->memory       = ((PetscObject)A)->mem;
1303d5f3da31SBarry Smith   if (A->factortype) {
130449b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
130549b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
130649b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
130749b5e25fSSatish Balay   } else {
130849b5e25fSSatish Balay     info->fill_ratio_given  = 0;
130949b5e25fSSatish Balay     info->fill_ratio_needed = 0;
131049b5e25fSSatish Balay     info->factor_mallocs    = 0;
131149b5e25fSSatish Balay   }
131249b5e25fSSatish Balay   PetscFunctionReturn(0);
131349b5e25fSSatish Balay }
131449b5e25fSSatish Balay 
131549b5e25fSSatish Balay 
13164a2ae208SSatish Balay #undef __FUNCT__
13174a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1318dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
131949b5e25fSSatish Balay {
132049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1321dfbe8321SBarry Smith   PetscErrorCode ierr;
132249b5e25fSSatish Balay 
132349b5e25fSSatish Balay   PetscFunctionBegin;
132449b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
132549b5e25fSSatish Balay   PetscFunctionReturn(0);
132649b5e25fSSatish Balay }
1327dc354874SHong Zhang 
13284a2ae208SSatish Balay #undef __FUNCT__
1329985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ"
1330985db425SBarry Smith /*
1331985db425SBarry Smith    This code does not work since it only checks the upper triangular part of
1332985db425SBarry Smith   the matrix. Hence it is not listed in the function table.
1333985db425SBarry Smith */
1334985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1335dc354874SHong Zhang {
1336dc354874SHong Zhang   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
1337dfbe8321SBarry Smith   PetscErrorCode  ierr;
1338d9ca1df4SBarry Smith   PetscInt        i,j,n,row,col,bs,mbs;
1339d9ca1df4SBarry Smith   const PetscInt  *ai,*aj;
1340c3fca9a7SHong Zhang   PetscReal       atmp;
1341d9ca1df4SBarry Smith   const MatScalar *aa;
1342985db425SBarry Smith   PetscScalar     *x;
134313f74950SBarry Smith   PetscInt        ncols,brow,bcol,krow,kcol;
1344dc354874SHong Zhang 
1345dc354874SHong Zhang   PetscFunctionBegin;
1346e32f2f54SBarry Smith   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1347e32f2f54SBarry Smith   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1348d0f46423SBarry Smith   bs  = A->rmap->bs;
1349dc354874SHong Zhang   aa  = a->a;
1350dc354874SHong Zhang   ai  = a->i;
1351dc354874SHong Zhang   aj  = a->j;
135244117c81SHong Zhang   mbs = a->mbs;
1353dc354874SHong Zhang 
1354985db425SBarry Smith   ierr = VecSet(v,0.0);CHKERRQ(ierr);
13551ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1356dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1357e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
135844117c81SHong Zhang   for (i=0; i<mbs; i++) {
1359d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1360d0f6400bSHong Zhang     brow  = bs*i;
136144117c81SHong Zhang     for (j=0; j<ncols; j++) {
1362d0f6400bSHong Zhang       bcol = bs*(*aj);
136344117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++) {
1364d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
136544117c81SHong Zhang         for (krow=0; krow<bs; krow++) {
1366d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1367d0f6400bSHong Zhang           row  = brow + krow;   /* row index */
1368c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1369c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
137044117c81SHong Zhang         }
137144117c81SHong Zhang       }
1372d0f6400bSHong Zhang       aj++;
1373dc354874SHong Zhang     }
1374dc354874SHong Zhang   }
13751ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1376dc354874SHong Zhang   PetscFunctionReturn(0);
1377dc354874SHong Zhang }
1378