xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision 8a0c6efd93b44e688e9d4a638634e73ed5f232de)
149b5e25fSSatish Balay 
2c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>
3c6db04a5SJed Brown #include <../src/mat/blockinvert.h>
4c6db04a5SJed Brown #include <petscbt.h>
5c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
6c6db04a5SJed Brown #include <petscblaslapack.h>
749b5e25fSSatish Balay 
84a2ae208SSatish Balay #undef __FUNCT__
94a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1013f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1149b5e25fSSatish Balay {
125eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
136849ba73SBarry Smith   PetscErrorCode ierr;
145d0c19d7SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
155d0c19d7SBarry Smith   const PetscInt *idx;
16db41cccfSHong Zhang   PetscBT        table_out,table_in;
17d94109b8SHong Zhang 
18d94109b8SHong Zhang   PetscFunctionBegin;
19e32f2f54SBarry Smith   if (ov < 0)  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20d94109b8SHong Zhang   mbs = a->mbs;
21d94109b8SHong Zhang   ai  = a->i;
22d94109b8SHong Zhang   aj  = a->j;
23d0f46423SBarry Smith   bs  = A->rmap->bs;
24db41cccfSHong Zhang   ierr = PetscBTCreate(mbs,table_out);CHKERRQ(ierr);
2513f74950SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
26d0f46423SBarry Smith   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
27db41cccfSHong Zhang   ierr = PetscBTCreate(mbs,table_in);CHKERRQ(ierr);
28d94109b8SHong Zhang 
29d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
30d94109b8SHong Zhang     isz  = 0;
31db41cccfSHong Zhang     ierr = PetscBTMemzero(mbs,table_out);CHKERRQ(ierr);
32d94109b8SHong Zhang 
33d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
34d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
35d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
36d94109b8SHong Zhang 
37db41cccfSHong Zhang     /* Enter these into the temp arrays i.e mark table_out[brow], enter brow into new index */
38dbe03f88SHong Zhang     bcol_max = 0;
39d94109b8SHong Zhang     for (j=0; j<n ; ++j){
40d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
41e32f2f54SBarry Smith       if (brow >= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42db41cccfSHong Zhang       if(!PetscBTLookupSet(table_out,brow)) {
43dbe03f88SHong Zhang         nidx[isz++] = brow;
44dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
45dbe03f88SHong Zhang       }
46d94109b8SHong Zhang     }
47d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
486bf464f9SBarry Smith     ierr = ISDestroy(&is[i]);CHKERRQ(ierr);
49d94109b8SHong Zhang 
50d94109b8SHong Zhang     k = 0;
51d94109b8SHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
52db41cccfSHong Zhang       /* set table_in for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53db41cccfSHong Zhang       ierr = PetscBTMemzero(mbs,table_in);CHKERRQ(ierr);
54db41cccfSHong Zhang       for (l=k; l<isz; l++) { ierr = PetscBTSet(table_in,nidx[l]);CHKERRQ(ierr); }
55d94109b8SHong Zhang 
56d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
57d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++){
58d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
59db41cccfSHong Zhang         if (PetscBTLookup(table_in,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
60d94109b8SHong Zhang           for (l = start; l<end ; l++){
61d94109b8SHong Zhang             bcol = aj[l];
62d7b97159SHong Zhang             if (!PetscBTLookupSet(table_out,bcol)) {
63d7b97159SHong Zhang               nidx[isz++] = bcol;
64d7b97159SHong Zhang               if (bcol_max < bcol) bcol_max = bcol;
65d7b97159SHong Zhang             }
66d94109b8SHong Zhang           }
67d94109b8SHong Zhang           k++;
68d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
69d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
70d94109b8SHong Zhang           for (l = start; l<end ; l++){
71d94109b8SHong Zhang             bcol = aj[l];
72dbe03f88SHong Zhang             if (bcol > bcol_max) break;
73db41cccfSHong Zhang             if (PetscBTLookup(table_in,bcol)){
74db41cccfSHong Zhang               if (!PetscBTLookupSet(table_out,brow)) {nidx[isz++] = brow;}
75d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
76d94109b8SHong Zhang             }
77d94109b8SHong Zhang           }
78d94109b8SHong Zhang         }
79d94109b8SHong Zhang       }
80d94109b8SHong Zhang     } /* for each overlap */
81d94109b8SHong Zhang 
82d94109b8SHong Zhang     /* expand the Index Set */
83d94109b8SHong Zhang     for (j=0; j<isz; j++) {
84d94109b8SHong Zhang       for (k=0; k<bs; k++)
85d94109b8SHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
86d94109b8SHong Zhang     }
8770b3c8c7SBarry Smith     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,PETSC_COPY_VALUES,is+i);CHKERRQ(ierr);
88d94109b8SHong Zhang   }
89db41cccfSHong Zhang   ierr = PetscBTDestroy(table_out);CHKERRQ(ierr);
90d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
91d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
92db41cccfSHong Zhang   ierr = PetscBTDestroy(table_in);CHKERRQ(ierr);
935eee224dSHong Zhang   PetscFunctionReturn(0);
9449b5e25fSSatish Balay }
9549b5e25fSSatish Balay 
964a2ae208SSatish Balay #undef __FUNCT__
974a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
984aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
9949b5e25fSSatish Balay {
10049b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
1016849ba73SBarry Smith   PetscErrorCode ierr;
10213f74950SBarry Smith   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
10313f74950SBarry Smith   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
1045d0c19d7SBarry Smith   PetscInt       nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
1055d0c19d7SBarry Smith   const PetscInt *irow,*aj = a->j,*ai = a->i;
10649b5e25fSSatish Balay   MatScalar      *mat_a;
10749b5e25fSSatish Balay   Mat            C;
108ace3abfcSBarry Smith   PetscBool      flag,sorted;
10949b5e25fSSatish Balay 
11049b5e25fSSatish Balay   PetscFunctionBegin;
111e32f2f54SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
11214ca34e6SBarry Smith   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
113e32f2f54SBarry Smith   if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
11449b5e25fSSatish Balay 
11549b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11649b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11749b5e25fSSatish Balay 
11874ed9c26SBarry Smith   ierr  = PetscMalloc(oldcols*sizeof(PetscInt),&smap);CHKERRQ(ierr);
11974ed9c26SBarry Smith   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
12049b5e25fSSatish Balay   ssmap = smap;
12113f74950SBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
12249b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
12349b5e25fSSatish Balay   /* determine lens of each row */
12449b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12549b5e25fSSatish Balay     kstart  = ai[irow[i]];
12649b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
12749b5e25fSSatish Balay     lens[i] = 0;
12849b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
12949b5e25fSSatish Balay         if (ssmap[aj[k]]) {
13049b5e25fSSatish Balay           lens[i]++;
13149b5e25fSSatish Balay         }
13249b5e25fSSatish Balay       }
13349b5e25fSSatish Balay     }
13449b5e25fSSatish Balay   /* Create and fill new matrix */
13549b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
13649b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
13749b5e25fSSatish Balay 
138e32f2f54SBarry Smith     if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
13913f74950SBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
140e7e72b3dSBarry Smith     if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
14113f74950SBarry Smith     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
14249b5e25fSSatish Balay     C = *B;
14349b5e25fSSatish Balay   } else {
1447adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
145f69a0ea3SMatthew Knepley     ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1467adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
147ab93d7beSBarry Smith     ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr);
14849b5e25fSSatish Balay   }
14949b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
15049b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
15149b5e25fSSatish Balay     row    = irow[i];
15249b5e25fSSatish Balay     kstart = ai[row];
15349b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
15449b5e25fSSatish Balay     mat_i  = c->i[i];
15549b5e25fSSatish Balay     mat_j  = c->j + mat_i;
15649b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
15749b5e25fSSatish Balay     mat_ilen = c->ilen + i;
15849b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
15949b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
16049b5e25fSSatish Balay         *mat_j++ = tcol - 1;
16149b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
16249b5e25fSSatish Balay         mat_a   += bs2;
16349b5e25fSSatish Balay         (*mat_ilen)++;
16449b5e25fSSatish Balay       }
16549b5e25fSSatish Balay     }
16649b5e25fSSatish Balay   }
16749b5e25fSSatish Balay 
16849b5e25fSSatish Balay   /* Free work space */
16949b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
17049b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
17149b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17249b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17349b5e25fSSatish Balay 
17449b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
17549b5e25fSSatish Balay   *B = C;
17649b5e25fSSatish Balay   PetscFunctionReturn(0);
17749b5e25fSSatish Balay }
17849b5e25fSSatish Balay 
1794a2ae208SSatish Balay #undef __FUNCT__
1804a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
1814aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
18249b5e25fSSatish Balay {
18349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
18449b5e25fSSatish Balay   IS             is1;
1856849ba73SBarry Smith   PetscErrorCode ierr;
1865d0c19d7SBarry Smith   PetscInt       *vary,*iary,nrows,i,bs=A->rmap->bs,count;
1875d0c19d7SBarry Smith   const PetscInt *irow;
18849b5e25fSSatish Balay 
18949b5e25fSSatish Balay   PetscFunctionBegin;
190e32f2f54SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
19149b5e25fSSatish Balay 
19249b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
19349b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
19449b5e25fSSatish Balay 
19549b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
19649b5e25fSSatish Balay    and form the IS with compressed IS */
19774ed9c26SBarry Smith   ierr = PetscMalloc2(a->mbs,PetscInt,&vary,a->mbs,PetscInt,&iary);CHKERRQ(ierr);
19874ed9c26SBarry Smith   ierr = PetscMemzero(vary,a->mbs*sizeof(PetscInt));CHKERRQ(ierr);
19949b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
20049b5e25fSSatish Balay 
20149b5e25fSSatish Balay   count = 0;
20249b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
203e32f2f54SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
20449b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
20549b5e25fSSatish Balay   }
20649b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
20770b3c8c7SBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr);
20874ed9c26SBarry Smith   ierr = PetscFree2(vary,iary);CHKERRQ(ierr);
20949b5e25fSSatish Balay 
2104aa3045dSJed Brown   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,scall,B);CHKERRQ(ierr);
2116bf464f9SBarry Smith   ierr = ISDestroy(&is1);CHKERRQ(ierr);
21249b5e25fSSatish Balay   PetscFunctionReturn(0);
21349b5e25fSSatish Balay }
21449b5e25fSSatish Balay 
2154a2ae208SSatish Balay #undef __FUNCT__
2164a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
21713f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
21849b5e25fSSatish Balay {
2196849ba73SBarry Smith   PetscErrorCode ierr;
22013f74950SBarry Smith   PetscInt       i;
22149b5e25fSSatish Balay 
22249b5e25fSSatish Balay   PetscFunctionBegin;
22349b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
22482502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
22549b5e25fSSatish Balay   }
22649b5e25fSSatish Balay 
22749b5e25fSSatish Balay   for (i=0; i<n; i++) {
2284aa3045dSJed Brown     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr);
22949b5e25fSSatish Balay   }
23049b5e25fSSatish Balay   PetscFunctionReturn(0);
23149b5e25fSSatish Balay }
23249b5e25fSSatish Balay 
23349b5e25fSSatish Balay /* -------------------------------------------------------*/
23449b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
23549b5e25fSSatish Balay /* -------------------------------------------------------*/
23649b5e25fSSatish Balay 
2374a2ae208SSatish Balay #undef __FUNCT__
2384a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
239dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
24049b5e25fSSatish Balay {
24149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
24287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
24349b5e25fSSatish Balay   MatScalar      *v;
2446849ba73SBarry Smith   PetscErrorCode ierr;
24513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
24698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
24749b5e25fSSatish Balay 
24849b5e25fSSatish Balay   PetscFunctionBegin;
2492dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2501ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2511ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
25249b5e25fSSatish Balay 
25349b5e25fSSatish Balay   v     = a->a;
25449b5e25fSSatish Balay   xb = x;
25549b5e25fSSatish Balay 
25649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
25749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
25849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
25949b5e25fSSatish Balay     ib = aj + *ai;
260831a3094SHong Zhang     jmin = 0;
26198c9bda7SSatish Balay     nonzerorow += (n>0);
2627fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
26349b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
26449b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
265831a3094SHong Zhang       v += 4; jmin++;
2667fbae186SHong Zhang     }
267444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
268444d8c10SJed Brown     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
269831a3094SHong Zhang     for (j=jmin; j<n; j++) {
27049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
27149b5e25fSSatish Balay       cval       = ib[j]*2;
27249b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
27349b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
27449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
27549b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
27649b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
27749b5e25fSSatish Balay       v  += 4;
27849b5e25fSSatish Balay     }
27949b5e25fSSatish Balay     xb +=2; ai++;
28049b5e25fSSatish Balay   }
28149b5e25fSSatish Balay 
2821ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2831ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
284dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
28549b5e25fSSatish Balay   PetscFunctionReturn(0);
28649b5e25fSSatish Balay }
28749b5e25fSSatish Balay 
2884a2ae208SSatish Balay #undef __FUNCT__
2894a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
290dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
29149b5e25fSSatish Balay {
29249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
29387828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
29449b5e25fSSatish Balay   MatScalar      *v;
2956849ba73SBarry Smith   PetscErrorCode ierr;
29613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
29798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
29849b5e25fSSatish Balay 
29949b5e25fSSatish Balay   PetscFunctionBegin;
3002dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3011ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3021ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
30349b5e25fSSatish Balay 
30449b5e25fSSatish Balay   v    = a->a;
30549b5e25fSSatish Balay   xb   = x;
30649b5e25fSSatish Balay 
30749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
30849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
30949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
31049b5e25fSSatish Balay     ib = aj + *ai;
311831a3094SHong Zhang     jmin = 0;
31298c9bda7SSatish Balay     nonzerorow += (n>0);
3137fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
31449b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
31549b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
31649b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
317831a3094SHong Zhang       v += 9; jmin++;
3187fbae186SHong Zhang     }
319444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
320444d8c10SJed Brown     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
321831a3094SHong Zhang     for (j=jmin; j<n; j++) {
32249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
32349b5e25fSSatish Balay       cval       = ib[j]*3;
32449b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
32549b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
32649b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
32749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
32849b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
32949b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
33049b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
33149b5e25fSSatish Balay       v  += 9;
33249b5e25fSSatish Balay     }
33349b5e25fSSatish Balay     xb +=3; ai++;
33449b5e25fSSatish Balay   }
33549b5e25fSSatish Balay 
3361ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3371ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
338dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
33949b5e25fSSatish Balay   PetscFunctionReturn(0);
34049b5e25fSSatish Balay }
34149b5e25fSSatish Balay 
3424a2ae208SSatish Balay #undef __FUNCT__
3434a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
344dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
34549b5e25fSSatish Balay {
34649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
34787828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
34849b5e25fSSatish Balay   MatScalar      *v;
3496849ba73SBarry Smith   PetscErrorCode ierr;
35013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
35198c9bda7SSatish Balay   PetscInt       nonzerorow=0;
35249b5e25fSSatish Balay 
35349b5e25fSSatish Balay   PetscFunctionBegin;
3542dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3551ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3561ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
35749b5e25fSSatish Balay 
35849b5e25fSSatish Balay   v     = a->a;
35949b5e25fSSatish Balay   xb = x;
36049b5e25fSSatish Balay 
36149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
36249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
36349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
36449b5e25fSSatish Balay     ib = aj + *ai;
365831a3094SHong Zhang     jmin = 0;
36698c9bda7SSatish Balay     nonzerorow += (n>0);
3677fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
36849b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
36949b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
37049b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
37149b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
372831a3094SHong Zhang       v += 16; jmin++;
3737fbae186SHong Zhang     }
374444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
375444d8c10SJed Brown     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
376831a3094SHong Zhang     for (j=jmin; j<n; j++) {
37749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
37849b5e25fSSatish Balay       cval       = ib[j]*4;
37949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
38049b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
38149b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
38249b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
38349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
38449b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
38549b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
38649b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
38749b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
38849b5e25fSSatish Balay       v  += 16;
38949b5e25fSSatish Balay     }
39049b5e25fSSatish Balay     xb +=4; ai++;
39149b5e25fSSatish Balay   }
39249b5e25fSSatish Balay 
3931ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3941ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
395dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
39649b5e25fSSatish Balay   PetscFunctionReturn(0);
39749b5e25fSSatish Balay }
39849b5e25fSSatish Balay 
3994a2ae208SSatish Balay #undef __FUNCT__
4004a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
401dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
40249b5e25fSSatish Balay {
40349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
40487828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
40549b5e25fSSatish Balay   MatScalar      *v;
4066849ba73SBarry Smith   PetscErrorCode ierr;
40713f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
40898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
40949b5e25fSSatish Balay 
41049b5e25fSSatish Balay   PetscFunctionBegin;
4112dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4121ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4131ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
41449b5e25fSSatish Balay 
41549b5e25fSSatish Balay   v     = a->a;
41649b5e25fSSatish Balay   xb = x;
41749b5e25fSSatish Balay 
41849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
41949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
42049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
42149b5e25fSSatish Balay     ib = aj + *ai;
422831a3094SHong Zhang     jmin = 0;
42398c9bda7SSatish Balay     nonzerorow += (n>0);
4247fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
42549b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
42649b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
42749b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
42849b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
42949b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
430831a3094SHong Zhang       v += 25; jmin++;
4317fbae186SHong Zhang     }
432444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
433444d8c10SJed Brown     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
434831a3094SHong Zhang     for (j=jmin; j<n; j++) {
43549b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
43649b5e25fSSatish Balay       cval       = ib[j]*5;
43749b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
43849b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
43949b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
44049b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
44149b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
44249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
44349b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
44449b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
44549b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
44649b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
44749b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
44849b5e25fSSatish Balay       v  += 25;
44949b5e25fSSatish Balay     }
45049b5e25fSSatish Balay     xb +=5; ai++;
45149b5e25fSSatish Balay   }
45249b5e25fSSatish Balay 
4531ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4541ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
455dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
45649b5e25fSSatish Balay   PetscFunctionReturn(0);
45749b5e25fSSatish Balay }
45849b5e25fSSatish Balay 
45949b5e25fSSatish Balay 
4604a2ae208SSatish Balay #undef __FUNCT__
4614a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
462dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
46349b5e25fSSatish Balay {
46449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
46587828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
46649b5e25fSSatish Balay   MatScalar      *v;
4676849ba73SBarry Smith   PetscErrorCode ierr;
46813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
46998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
47049b5e25fSSatish Balay 
47149b5e25fSSatish Balay   PetscFunctionBegin;
4722dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4731ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4741ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
47549b5e25fSSatish Balay 
47649b5e25fSSatish Balay   v     = a->a;
47749b5e25fSSatish Balay   xb = x;
47849b5e25fSSatish Balay 
47949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
48049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
48149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
48249b5e25fSSatish Balay     ib = aj + *ai;
483831a3094SHong Zhang     jmin = 0;
48498c9bda7SSatish Balay     nonzerorow += (n>0);
4857fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
48649b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
48749b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
48849b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
48949b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
49049b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
49149b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
492831a3094SHong Zhang       v += 36; jmin++;
4937fbae186SHong Zhang     }
494444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
495444d8c10SJed Brown     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
496831a3094SHong Zhang     for (j=jmin; j<n; j++) {
49749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
49849b5e25fSSatish Balay       cval       = ib[j]*6;
49949b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
50049b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
50149b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
50249b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
50349b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
50449b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
50549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
50649b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
50749b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
50849b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
50949b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
51049b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
51149b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
51249b5e25fSSatish Balay       v  += 36;
51349b5e25fSSatish Balay     }
51449b5e25fSSatish Balay     xb +=6; ai++;
51549b5e25fSSatish Balay   }
51649b5e25fSSatish Balay 
5171ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5181ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
519dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
52049b5e25fSSatish Balay   PetscFunctionReturn(0);
52149b5e25fSSatish Balay }
5224a2ae208SSatish Balay #undef __FUNCT__
5234a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
524dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
52549b5e25fSSatish Balay {
52649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
52787828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
52849b5e25fSSatish Balay   MatScalar      *v;
5296849ba73SBarry Smith   PetscErrorCode ierr;
53013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
53198c9bda7SSatish Balay   PetscInt       nonzerorow=0;
53249b5e25fSSatish Balay 
53349b5e25fSSatish Balay   PetscFunctionBegin;
5342dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5351ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5361ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
53749b5e25fSSatish Balay 
53849b5e25fSSatish Balay   v     = a->a;
53949b5e25fSSatish Balay   xb = x;
54049b5e25fSSatish Balay 
54149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
54249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
54349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
54449b5e25fSSatish Balay     ib = aj + *ai;
545831a3094SHong Zhang     jmin = 0;
54698c9bda7SSatish Balay     nonzerorow += (n>0);
5477fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
54849b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
54949b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
55049b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
55149b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
55249b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
55349b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
55449b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
555831a3094SHong Zhang       v += 49; jmin++;
5567fbae186SHong Zhang     }
557444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
558444d8c10SJed Brown     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
559831a3094SHong Zhang     for (j=jmin; j<n; j++) {
56049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
56149b5e25fSSatish Balay       cval       = ib[j]*7;
56249b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
56349b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
56449b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
56549b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
56649b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
56749b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
56849b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
56949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
57049b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
57149b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
57249b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
57349b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
57449b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
57549b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
57649b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
57749b5e25fSSatish Balay       v  += 49;
57849b5e25fSSatish Balay     }
57949b5e25fSSatish Balay     xb +=7; ai++;
58049b5e25fSSatish Balay   }
5811ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5821ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
583dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
58449b5e25fSSatish Balay   PetscFunctionReturn(0);
58549b5e25fSSatish Balay }
58649b5e25fSSatish Balay 
58749b5e25fSSatish Balay /*
58849b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
58949b5e25fSSatish Balay */
5904a2ae208SSatish Balay #undef __FUNCT__
5914a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
592dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
59349b5e25fSSatish Balay {
59449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
59587828ca2SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
5960b60a74dSHong Zhang   MatScalar      *v;
597dfbe8321SBarry Smith   PetscErrorCode ierr;
598d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
59998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
60049b5e25fSSatish Balay 
60149b5e25fSSatish Balay   PetscFunctionBegin;
6022dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
6031ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
6041ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
60549b5e25fSSatish Balay 
60649b5e25fSSatish Balay   aj   = a->j;
60749b5e25fSSatish Balay   v    = a->a;
60849b5e25fSSatish Balay   ii   = a->i;
60949b5e25fSSatish Balay 
61049b5e25fSSatish Balay   if (!a->mult_work) {
611d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
61249b5e25fSSatish Balay   }
61349b5e25fSSatish Balay   work = a->mult_work;
61449b5e25fSSatish Balay 
61549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
61649b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
61749b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
61898c9bda7SSatish Balay     nonzerorow += (n>0);
61949b5e25fSSatish Balay 
62049b5e25fSSatish Balay     /* upper triangular part */
62149b5e25fSSatish Balay     for (j=0; j<n; j++) {
62249b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
62349b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
62449b5e25fSSatish Balay       workt += bs;
62549b5e25fSSatish Balay     }
62649b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
62749b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
62849b5e25fSSatish Balay 
62949b5e25fSSatish Balay     /* strict lower triangular part */
630831a3094SHong Zhang     idx = aj+ii[0];
631831a3094SHong Zhang     if (*idx == i){
63296b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
633831a3094SHong Zhang     }
63496b9376eSHong Zhang 
63549b5e25fSSatish Balay     if (ncols > 0){
63649b5e25fSSatish Balay       workt = work;
63787828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
638831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
639831a3094SHong Zhang       for (j=0; j<n; j++) {
640831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
64149b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
64249b5e25fSSatish Balay         workt += bs;
64349b5e25fSSatish Balay       }
64449b5e25fSSatish Balay     }
64549b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
64649b5e25fSSatish Balay   }
64749b5e25fSSatish Balay 
6481ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6491ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
650dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr);
65149b5e25fSSatish Balay   PetscFunctionReturn(0);
65249b5e25fSSatish Balay }
65349b5e25fSSatish Balay 
6544a2ae208SSatish Balay #undef __FUNCT__
6554a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
656dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
65749b5e25fSSatish Balay {
65849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
659bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1;
66049b5e25fSSatish Balay   MatScalar      *v;
6616849ba73SBarry Smith   PetscErrorCode ierr;
66213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
66398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
66449b5e25fSSatish Balay 
66549b5e25fSSatish Balay   PetscFunctionBegin;
666343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
6671ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6681ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
66949b5e25fSSatish Balay   v  = a->a;
67049b5e25fSSatish Balay   xb = x;
67149b5e25fSSatish Balay 
67249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
67349b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
67449b5e25fSSatish Balay     x1 = xb[0];
67549b5e25fSSatish Balay     ib = aj + *ai;
676831a3094SHong Zhang     jmin = 0;
67798c9bda7SSatish Balay     nonzerorow += (n>0);
678831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
679831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
680831a3094SHong Zhang     }
681831a3094SHong Zhang     for (j=jmin; j<n; j++) {
68249b5e25fSSatish Balay       cval    = *ib;
68349b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
68449b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
68549b5e25fSSatish Balay     }
68649b5e25fSSatish Balay     xb++; ai++;
68749b5e25fSSatish Balay   }
68849b5e25fSSatish Balay 
6891ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
690bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
69149b5e25fSSatish Balay 
692dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
69349b5e25fSSatish Balay   PetscFunctionReturn(0);
69449b5e25fSSatish Balay }
69549b5e25fSSatish Balay 
6964a2ae208SSatish Balay #undef __FUNCT__
6974a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
698dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
69949b5e25fSSatish Balay {
70049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
701bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2;
70249b5e25fSSatish Balay   MatScalar      *v;
7036849ba73SBarry Smith   PetscErrorCode ierr;
70413f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
70598c9bda7SSatish Balay   PetscInt       nonzerorow=0;
70649b5e25fSSatish Balay 
70749b5e25fSSatish Balay   PetscFunctionBegin;
708343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
7091ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7101ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
71149b5e25fSSatish Balay 
71249b5e25fSSatish Balay   v  = a->a;
71349b5e25fSSatish Balay   xb = x;
71449b5e25fSSatish Balay 
71549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
71649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
71749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
71849b5e25fSSatish Balay     ib = aj + *ai;
719831a3094SHong Zhang     jmin = 0;
72098c9bda7SSatish Balay     nonzerorow += (n>0);
7217fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
72249b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
72349b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
724831a3094SHong Zhang       v += 4; jmin++;
7257fbae186SHong Zhang     }
726444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
727444d8c10SJed Brown     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
728831a3094SHong Zhang     for (j=jmin; j<n; j++) {
72949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
73049b5e25fSSatish Balay       cval       = ib[j]*2;
73149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
73249b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
73349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
73449b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
73549b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
73649b5e25fSSatish Balay       v  += 4;
73749b5e25fSSatish Balay     }
73849b5e25fSSatish Balay     xb +=2; ai++;
73949b5e25fSSatish Balay   }
7401ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
741bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
74249b5e25fSSatish Balay 
743dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
74449b5e25fSSatish Balay   PetscFunctionReturn(0);
74549b5e25fSSatish Balay }
74649b5e25fSSatish Balay 
7474a2ae208SSatish Balay #undef __FUNCT__
7484a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
749dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
75049b5e25fSSatish Balay {
75149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
752bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3;
75349b5e25fSSatish Balay   MatScalar      *v;
7546849ba73SBarry Smith   PetscErrorCode ierr;
75513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
75698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
75749b5e25fSSatish Balay 
75849b5e25fSSatish Balay   PetscFunctionBegin;
759343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
7601ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7611ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
76249b5e25fSSatish Balay 
76349b5e25fSSatish Balay   v     = a->a;
76449b5e25fSSatish Balay   xb = x;
76549b5e25fSSatish Balay 
76649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
76749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
76849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
76949b5e25fSSatish Balay     ib = aj + *ai;
770831a3094SHong Zhang     jmin = 0;
77198c9bda7SSatish Balay     nonzerorow += (n>0);
7727fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
77349b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
77449b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
77549b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
776831a3094SHong Zhang      v += 9; jmin++;
7777fbae186SHong Zhang     }
778444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
779444d8c10SJed Brown     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
780831a3094SHong Zhang     for (j=jmin; j<n; j++) {
78149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
78249b5e25fSSatish Balay       cval       = ib[j]*3;
78349b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
78449b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
78549b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
78649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
78749b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
78849b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
78949b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
79049b5e25fSSatish Balay       v  += 9;
79149b5e25fSSatish Balay     }
79249b5e25fSSatish Balay     xb +=3; ai++;
79349b5e25fSSatish Balay   }
79449b5e25fSSatish Balay 
7951ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
796bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
79749b5e25fSSatish Balay 
798dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
79949b5e25fSSatish Balay   PetscFunctionReturn(0);
80049b5e25fSSatish Balay }
80149b5e25fSSatish Balay 
8024a2ae208SSatish Balay #undef __FUNCT__
8034a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
804dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
80549b5e25fSSatish Balay {
80649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
807bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
80849b5e25fSSatish Balay   MatScalar      *v;
8096849ba73SBarry Smith   PetscErrorCode ierr;
81013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
81198c9bda7SSatish Balay   PetscInt       nonzerorow=0;
81249b5e25fSSatish Balay 
81349b5e25fSSatish Balay   PetscFunctionBegin;
814343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
8151ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8161ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
81749b5e25fSSatish Balay 
81849b5e25fSSatish Balay   v     = a->a;
81949b5e25fSSatish Balay   xb = x;
82049b5e25fSSatish Balay 
82149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
82249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
82349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
82449b5e25fSSatish Balay     ib = aj + *ai;
825831a3094SHong Zhang     jmin = 0;
82698c9bda7SSatish Balay     nonzerorow += (n>0);
8277fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
82849b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
82949b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
83049b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
83149b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
832831a3094SHong Zhang       v += 16; jmin++;
8337fbae186SHong Zhang     }
834444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
835444d8c10SJed Brown     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
836831a3094SHong Zhang     for (j=jmin; j<n; j++) {
83749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
83849b5e25fSSatish Balay       cval       = ib[j]*4;
83949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
84049b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
84149b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
84249b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
84349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
84449b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
84549b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
84649b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
84749b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
84849b5e25fSSatish Balay       v  += 16;
84949b5e25fSSatish Balay     }
85049b5e25fSSatish Balay     xb +=4; ai++;
85149b5e25fSSatish Balay   }
85249b5e25fSSatish Balay 
8531ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
854bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
85549b5e25fSSatish Balay 
856dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
85749b5e25fSSatish Balay   PetscFunctionReturn(0);
85849b5e25fSSatish Balay }
85949b5e25fSSatish Balay 
8604a2ae208SSatish Balay #undef __FUNCT__
8614a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
862dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
86349b5e25fSSatish Balay {
86449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
865bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
86649b5e25fSSatish Balay   MatScalar      *v;
8676849ba73SBarry Smith   PetscErrorCode ierr;
86813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
86998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
87049b5e25fSSatish Balay 
87149b5e25fSSatish Balay   PetscFunctionBegin;
872343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
8731ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8741ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
87549b5e25fSSatish Balay 
87649b5e25fSSatish Balay   v     = a->a;
87749b5e25fSSatish Balay   xb = x;
87849b5e25fSSatish Balay 
87949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
88049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
88149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
88249b5e25fSSatish Balay     ib = aj + *ai;
883831a3094SHong Zhang     jmin = 0;
88498c9bda7SSatish Balay     nonzerorow += (n>0);
8857fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
88649b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
88749b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
88849b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
88949b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
89049b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
891831a3094SHong Zhang       v += 25; jmin++;
8927fbae186SHong Zhang     }
893444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
894444d8c10SJed Brown     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
895831a3094SHong Zhang     for (j=jmin; j<n; j++) {
89649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
89749b5e25fSSatish Balay       cval       = ib[j]*5;
89849b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
89949b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
90049b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
90149b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
90249b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
90349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
90449b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
90549b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
90649b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
90749b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
90849b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
90949b5e25fSSatish Balay       v  += 25;
91049b5e25fSSatish Balay     }
91149b5e25fSSatish Balay     xb +=5; ai++;
91249b5e25fSSatish Balay   }
91349b5e25fSSatish Balay 
9141ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
915bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
91649b5e25fSSatish Balay 
917dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
91849b5e25fSSatish Balay   PetscFunctionReturn(0);
91949b5e25fSSatish Balay }
9204a2ae208SSatish Balay #undef __FUNCT__
9214a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
922dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
92349b5e25fSSatish Balay {
92449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
925bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
92649b5e25fSSatish Balay   MatScalar      *v;
9276849ba73SBarry Smith   PetscErrorCode ierr;
92813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
92998c9bda7SSatish Balay   PetscInt       nonzerorow=0;
93049b5e25fSSatish Balay 
93149b5e25fSSatish Balay   PetscFunctionBegin;
932343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
9331ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9341ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
93549b5e25fSSatish Balay 
93649b5e25fSSatish Balay   v     = a->a;
93749b5e25fSSatish Balay   xb = x;
93849b5e25fSSatish Balay 
93949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
94049b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
94149b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
94249b5e25fSSatish Balay     ib = aj + *ai;
943831a3094SHong Zhang     jmin = 0;
94498c9bda7SSatish Balay     nonzerorow += (n>0);
9457fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
94649b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
94749b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
94849b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
94949b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
95049b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
95149b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
952831a3094SHong Zhang       v += 36; jmin++;
9537fbae186SHong Zhang     }
954444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
955444d8c10SJed Brown     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
956831a3094SHong Zhang     for (j=jmin; j<n; j++) {
95749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
95849b5e25fSSatish Balay       cval       = ib[j]*6;
95949b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
96049b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
96149b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
96249b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
96349b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
96449b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
96549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
96649b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
96749b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
96849b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
96949b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
97049b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
97149b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
97249b5e25fSSatish Balay       v  += 36;
97349b5e25fSSatish Balay     }
97449b5e25fSSatish Balay     xb +=6; ai++;
97549b5e25fSSatish Balay   }
97649b5e25fSSatish Balay 
9771ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
978bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
97949b5e25fSSatish Balay 
980dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
98149b5e25fSSatish Balay   PetscFunctionReturn(0);
98249b5e25fSSatish Balay }
98349b5e25fSSatish Balay 
9844a2ae208SSatish Balay #undef __FUNCT__
9854a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
986dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
98749b5e25fSSatish Balay {
98849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
989bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
99049b5e25fSSatish Balay   MatScalar      *v;
9916849ba73SBarry Smith   PetscErrorCode ierr;
99213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
99398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
99449b5e25fSSatish Balay 
99549b5e25fSSatish Balay   PetscFunctionBegin;
996343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
9971ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9981ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
99949b5e25fSSatish Balay 
100049b5e25fSSatish Balay   v     = a->a;
100149b5e25fSSatish Balay   xb = x;
100249b5e25fSSatish Balay 
100349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
100449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
100549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
100649b5e25fSSatish Balay     ib = aj + *ai;
1007831a3094SHong Zhang     jmin = 0;
100898c9bda7SSatish Balay     nonzerorow += (n>0);
10097fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
101049b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
101149b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
101249b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
101349b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
101449b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
101549b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
101649b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1017831a3094SHong Zhang       v += 49; jmin++;
10187fbae186SHong Zhang     }
1019444d8c10SJed Brown     PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
1020444d8c10SJed Brown     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1021831a3094SHong Zhang     for (j=jmin; j<n; j++) {
102249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
102349b5e25fSSatish Balay       cval       = ib[j]*7;
102449b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
102549b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
102649b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
102749b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
102849b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
102949b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
103049b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
103149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
103249b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
103349b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
103449b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
103549b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
103649b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
103749b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
103849b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
103949b5e25fSSatish Balay       v  += 49;
104049b5e25fSSatish Balay     }
104149b5e25fSSatish Balay     xb +=7; ai++;
104249b5e25fSSatish Balay   }
104349b5e25fSSatish Balay 
10441ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1045bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
104649b5e25fSSatish Balay 
1047dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
104849b5e25fSSatish Balay   PetscFunctionReturn(0);
104949b5e25fSSatish Balay }
105049b5e25fSSatish Balay 
10514a2ae208SSatish Balay #undef __FUNCT__
10524a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1053dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
105449b5e25fSSatish Balay {
105549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1056bba805e6SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1057066653e3SSatish Balay   MatScalar      *v;
1058dfbe8321SBarry Smith   PetscErrorCode ierr;
1059d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
106098c9bda7SSatish Balay   PetscInt       nonzerorow=0;
106149b5e25fSSatish Balay 
106249b5e25fSSatish Balay   PetscFunctionBegin;
1063343551c4SBarry Smith   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
10641ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
10651ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
106649b5e25fSSatish Balay 
106749b5e25fSSatish Balay   aj   = a->j;
106849b5e25fSSatish Balay   v    = a->a;
106949b5e25fSSatish Balay   ii   = a->i;
107049b5e25fSSatish Balay 
107149b5e25fSSatish Balay   if (!a->mult_work) {
1072d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
107349b5e25fSSatish Balay   }
107449b5e25fSSatish Balay   work = a->mult_work;
107549b5e25fSSatish Balay 
107649b5e25fSSatish Balay 
107749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
107849b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
107949b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
108098c9bda7SSatish Balay     nonzerorow += (n>0);
108149b5e25fSSatish Balay 
108249b5e25fSSatish Balay     /* upper triangular part */
108349b5e25fSSatish Balay     for (j=0; j<n; j++) {
108449b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
108549b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
108649b5e25fSSatish Balay       workt += bs;
108749b5e25fSSatish Balay     }
108849b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
108949b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
109049b5e25fSSatish Balay 
109149b5e25fSSatish Balay     /* strict lower triangular part */
1092831a3094SHong Zhang     idx = aj+ii[0];
1093831a3094SHong Zhang     if (*idx == i){
109496b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1095831a3094SHong Zhang     }
109649b5e25fSSatish Balay     if (ncols > 0){
109749b5e25fSSatish Balay       workt = work;
109887828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1099831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1100831a3094SHong Zhang       for (j=0; j<n; j++) {
1101831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
110249b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
110349b5e25fSSatish Balay         workt += bs;
110449b5e25fSSatish Balay       }
110549b5e25fSSatish Balay     }
110649b5e25fSSatish Balay 
110749b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
110849b5e25fSSatish Balay   }
110949b5e25fSSatish Balay 
11101ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1111bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
111249b5e25fSSatish Balay 
1113dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
111449b5e25fSSatish Balay   PetscFunctionReturn(0);
111549b5e25fSSatish Balay }
111649b5e25fSSatish Balay 
11174a2ae208SSatish Balay #undef __FUNCT__
11184a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1119f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
112049b5e25fSSatish Balay {
112149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1122f4df32b1SMatthew Knepley   PetscScalar    oalpha = alpha;
1123efee365bSSatish Balay   PetscErrorCode ierr;
11240805154bSBarry Smith   PetscBLASInt   one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz);
112549b5e25fSSatish Balay 
112649b5e25fSSatish Balay   PetscFunctionBegin;
1127f4df32b1SMatthew Knepley   BLASscal_(&totalnz,&oalpha,a->a,&one);
1128efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
112949b5e25fSSatish Balay   PetscFunctionReturn(0);
113049b5e25fSSatish Balay }
113149b5e25fSSatish Balay 
11324a2ae208SSatish Balay #undef __FUNCT__
11334a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1134dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
113549b5e25fSSatish Balay {
113649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
113749b5e25fSSatish Balay   MatScalar      *v = a->a;
113849b5e25fSSatish Balay   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1139d0f46423SBarry Smith   PetscInt       i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
11406849ba73SBarry Smith   PetscErrorCode ierr;
114113f74950SBarry Smith   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
114249b5e25fSSatish Balay 
114349b5e25fSSatish Balay   PetscFunctionBegin;
114449b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
114549b5e25fSSatish Balay     for (k=0; k<mbs; k++){
114649b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1147831a3094SHong Zhang       col  = aj + jmin;
1148831a3094SHong Zhang       if (*col == k){         /* diagonal block */
114949b5e25fSSatish Balay         for (i=0; i<bs2; i++){
115049b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
115149b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
115249b5e25fSSatish Balay #else
115349b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
115449b5e25fSSatish Balay #endif
115549b5e25fSSatish Balay         }
1156831a3094SHong Zhang         jmin++;
1157831a3094SHong Zhang       }
1158831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
115949b5e25fSSatish Balay         for (i=0; i<bs2; i++){
116049b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
116149b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
116249b5e25fSSatish Balay #else
116349b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
116449b5e25fSSatish Balay #endif
116549b5e25fSSatish Balay         }
116649b5e25fSSatish Balay       }
116749b5e25fSSatish Balay     }
116849b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
11690b8dc8d2SHong Zhang   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
117074ed9c26SBarry Smith     ierr = PetscMalloc3(bs,PetscReal,&sum,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
11710b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11720b8dc8d2SHong Zhang     il[0] = 0;
117349b5e25fSSatish Balay 
117449b5e25fSSatish Balay     *norm = 0.0;
117549b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
117649b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
117749b5e25fSSatish Balay       /*-- col sum --*/
117849b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1179831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1180831a3094SHong Zhang                   at step k */
118149b5e25fSSatish Balay       while (i<mbs){
118249b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
118349b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
118449b5e25fSSatish Balay         for (j=0; j<bs; j++){
118549b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
118649b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
118749b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
118849b5e25fSSatish Balay           }
118949b5e25fSSatish Balay         }
119049b5e25fSSatish Balay         /* update il, jl */
1191831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1192831a3094SHong Zhang         jmax = a->i[i+1];
119349b5e25fSSatish Balay         if (jmin < jmax){
119449b5e25fSSatish Balay           il[i] = jmin;
119549b5e25fSSatish Balay           j   = a->j[jmin];
119649b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
119749b5e25fSSatish Balay         }
119849b5e25fSSatish Balay         i = nexti;
119949b5e25fSSatish Balay       }
120049b5e25fSSatish Balay       /*-- row sum --*/
120149b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
120249b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
120349b5e25fSSatish Balay         for (j=0; j<bs; j++){
120449b5e25fSSatish Balay           v = a->a + i*bs2 + j;
120549b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
12060b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
120749b5e25fSSatish Balay           }
120849b5e25fSSatish Balay         }
120949b5e25fSSatish Balay       }
121049b5e25fSSatish Balay       /* add k_th block row to il, jl */
1211831a3094SHong Zhang       col = aj+jmin;
1212831a3094SHong Zhang       if (*col == k) jmin++;
121349b5e25fSSatish Balay       if (jmin < jmax){
121449b5e25fSSatish Balay         il[k] = jmin;
12150b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
121649b5e25fSSatish Balay       }
121749b5e25fSSatish Balay       for (j=0; j<bs; j++){
121849b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
121949b5e25fSSatish Balay       }
122049b5e25fSSatish Balay     }
122174ed9c26SBarry Smith     ierr = PetscFree3(sum,il,jl);CHKERRQ(ierr);
122249b5e25fSSatish Balay   } else {
1223e32f2f54SBarry Smith     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
122449b5e25fSSatish Balay   }
122549b5e25fSSatish Balay   PetscFunctionReturn(0);
122649b5e25fSSatish Balay }
122749b5e25fSSatish Balay 
12284a2ae208SSatish Balay #undef __FUNCT__
12294a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1230ace3abfcSBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscBool * flg)
123149b5e25fSSatish Balay {
123249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1233dfbe8321SBarry Smith   PetscErrorCode ierr;
123449b5e25fSSatish Balay 
123549b5e25fSSatish Balay   PetscFunctionBegin;
123649b5e25fSSatish Balay 
123749b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1238d0f46423SBarry Smith   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1239ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1240ef511fbeSHong Zhang     PetscFunctionReturn(0);
124149b5e25fSSatish Balay   }
124249b5e25fSSatish Balay 
124349b5e25fSSatish Balay   /* if the a->i are the same */
124413f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1245abc0a331SBarry Smith   if (!*flg) {
124649b5e25fSSatish Balay     PetscFunctionReturn(0);
124749b5e25fSSatish Balay   }
124849b5e25fSSatish Balay 
124949b5e25fSSatish Balay   /* if a->j are the same */
125013f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1251abc0a331SBarry Smith   if (!*flg) {
125249b5e25fSSatish Balay     PetscFunctionReturn(0);
125349b5e25fSSatish Balay   }
125449b5e25fSSatish Balay   /* if a->a are the same */
1255d0f46423SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1256935af2e7SHong Zhang   PetscFunctionReturn(0);
125749b5e25fSSatish Balay }
125849b5e25fSSatish Balay 
12594a2ae208SSatish Balay #undef __FUNCT__
12604a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1261dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
126249b5e25fSSatish Balay {
126349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1264dfbe8321SBarry Smith   PetscErrorCode ierr;
1265*8a0c6efdSHong Zhang   PetscInt       i,j,k,row,bs,*ai,*aj,ambs,bs2;
126687828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
126749b5e25fSSatish Balay   MatScalar      *aa,*aa_j;
126849b5e25fSSatish Balay 
126949b5e25fSSatish Balay   PetscFunctionBegin;
1270d0f46423SBarry Smith   bs   = A->rmap->bs;
1271e32f2f54SBarry Smith   if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
127282799104SHong Zhang 
127349b5e25fSSatish Balay   aa   = a->a;
1274*8a0c6efdSHong Zhang   ambs = a->mbs;
1275*8a0c6efdSHong Zhang 
1276*8a0c6efdSHong Zhang   if (A->factortype == MAT_FACTOR_CHOLESKY || A->factortype == MAT_FACTOR_ICC){
1277*8a0c6efdSHong Zhang     PetscInt *diag=a->diag;
1278*8a0c6efdSHong Zhang     aa   = a->a;
1279*8a0c6efdSHong Zhang     ambs = a->mbs;
1280*8a0c6efdSHong Zhang     ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1281*8a0c6efdSHong Zhang     for (i=0; i<ambs; i++) x[i] = 1.0/aa[diag[i]];
1282*8a0c6efdSHong Zhang     ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1283*8a0c6efdSHong Zhang     PetscFunctionReturn(0);
1284*8a0c6efdSHong Zhang   }
1285*8a0c6efdSHong Zhang 
128649b5e25fSSatish Balay   ai   = a->i;
128749b5e25fSSatish Balay   aj   = a->j;
128849b5e25fSSatish Balay   bs2  = a->bs2;
12892dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
12901ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
129149b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
129249b5e25fSSatish Balay     j=ai[i];
129349b5e25fSSatish Balay     if (aj[j] == i) {    /* if this is a diagonal element */
129449b5e25fSSatish Balay       row  = i*bs;
129549b5e25fSSatish Balay       aa_j = aa + j*bs2;
129649b5e25fSSatish Balay       for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
129749b5e25fSSatish Balay     }
129849b5e25fSSatish Balay   }
12991ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
130049b5e25fSSatish Balay   PetscFunctionReturn(0);
130149b5e25fSSatish Balay }
130249b5e25fSSatish Balay 
13034a2ae208SSatish Balay #undef __FUNCT__
13044a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1305dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
130649b5e25fSSatish Balay {
130749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
13085e90f9d9SHong Zhang   PetscScalar    *l,x,*li,*ri;
130949b5e25fSSatish Balay   MatScalar      *aa,*v;
1310dfbe8321SBarry Smith   PetscErrorCode ierr;
13115e90f9d9SHong Zhang   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1312ace3abfcSBarry Smith   PetscBool      flg;
131349b5e25fSSatish Balay 
131449b5e25fSSatish Balay   PetscFunctionBegin;
1315b3bf805bSHong Zhang   if (ll != rr){
1316b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1317e7e72b3dSBarry Smith     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1318b3bf805bSHong Zhang   }
1319b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
132049b5e25fSSatish Balay   ai  = a->i;
132149b5e25fSSatish Balay   aj  = a->j;
132249b5e25fSSatish Balay   aa  = a->a;
1323d0f46423SBarry Smith   m   = A->rmap->N;
1324d0f46423SBarry Smith   bs  = A->rmap->bs;
132549b5e25fSSatish Balay   mbs = a->mbs;
132649b5e25fSSatish Balay   bs2 = a->bs2;
132749b5e25fSSatish Balay 
13281ebc52fbSHong Zhang   ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
132949b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1330e32f2f54SBarry Smith   if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
133149b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
133249b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
133349b5e25fSSatish Balay     li = l + i*bs;
133449b5e25fSSatish Balay     v  = aa + bs2*ai[i];
133549b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
133649b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
13375e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
133849b5e25fSSatish Balay         x = ri[k];
133949b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
134049b5e25fSSatish Balay       }
134149b5e25fSSatish Balay     }
134249b5e25fSSatish Balay   }
13431ebc52fbSHong Zhang   ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1344dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
134549b5e25fSSatish Balay   PetscFunctionReturn(0);
134649b5e25fSSatish Balay }
134749b5e25fSSatish Balay 
13484a2ae208SSatish Balay #undef __FUNCT__
13494a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1350dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
135149b5e25fSSatish Balay {
135249b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
135349b5e25fSSatish Balay 
135449b5e25fSSatish Balay   PetscFunctionBegin;
135549b5e25fSSatish Balay   info->block_size     = a->bs2;
1356ceed8ce5SJed Brown   info->nz_allocated   = a->bs2*a->maxnz; /*num. of nonzeros in upper triangular part */
13576c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
135849b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
135949b5e25fSSatish Balay   info->assemblies   = A->num_ass;
13608e58a170SBarry Smith   info->mallocs      = A->info.mallocs;
13617adad957SLisandro Dalcin   info->memory       = ((PetscObject)A)->mem;
1362d5f3da31SBarry Smith   if (A->factortype) {
136349b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
136449b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
136549b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
136649b5e25fSSatish Balay   } else {
136749b5e25fSSatish Balay     info->fill_ratio_given  = 0;
136849b5e25fSSatish Balay     info->fill_ratio_needed = 0;
136949b5e25fSSatish Balay     info->factor_mallocs    = 0;
137049b5e25fSSatish Balay   }
137149b5e25fSSatish Balay   PetscFunctionReturn(0);
137249b5e25fSSatish Balay }
137349b5e25fSSatish Balay 
137449b5e25fSSatish Balay 
13754a2ae208SSatish Balay #undef __FUNCT__
13764a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1377dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
137849b5e25fSSatish Balay {
137949b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1380dfbe8321SBarry Smith   PetscErrorCode ierr;
138149b5e25fSSatish Balay 
138249b5e25fSSatish Balay   PetscFunctionBegin;
138349b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
138449b5e25fSSatish Balay   PetscFunctionReturn(0);
138549b5e25fSSatish Balay }
1386dc354874SHong Zhang 
13874a2ae208SSatish Balay #undef __FUNCT__
1388985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ"
1389985db425SBarry Smith /*
1390985db425SBarry Smith    This code does not work since it only checks the upper triangular part of
1391985db425SBarry Smith   the matrix. Hence it is not listed in the function table.
1392985db425SBarry Smith */
1393985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1394dc354874SHong Zhang {
1395dc354874SHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1396dfbe8321SBarry Smith   PetscErrorCode ierr;
139713f74950SBarry Smith   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1398c3fca9a7SHong Zhang   PetscReal      atmp;
1399273d9f13SBarry Smith   MatScalar      *aa;
1400985db425SBarry Smith   PetscScalar    *x;
140113f74950SBarry Smith   PetscInt       ncols,brow,bcol,krow,kcol;
1402dc354874SHong Zhang 
1403dc354874SHong Zhang   PetscFunctionBegin;
1404e32f2f54SBarry Smith   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1405e32f2f54SBarry Smith   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1406d0f46423SBarry Smith   bs   = A->rmap->bs;
1407dc354874SHong Zhang   aa   = a->a;
1408dc354874SHong Zhang   ai   = a->i;
1409dc354874SHong Zhang   aj   = a->j;
141044117c81SHong Zhang   mbs = a->mbs;
1411dc354874SHong Zhang 
1412985db425SBarry Smith   ierr = VecSet(v,0.0);CHKERRQ(ierr);
14131ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1414dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1415e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
141644117c81SHong Zhang   for (i=0; i<mbs; i++) {
1417d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1418d0f6400bSHong Zhang     brow  = bs*i;
141944117c81SHong Zhang     for (j=0; j<ncols; j++){
1420d0f6400bSHong Zhang       bcol = bs*(*aj);
142144117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1422d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
142344117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1424d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1425d0f6400bSHong Zhang           row = brow + krow;    /* row index */
1426c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1427c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
142844117c81SHong Zhang         }
142944117c81SHong Zhang       }
1430d0f6400bSHong Zhang       aj++;
1431dc354874SHong Zhang     }
1432dc354874SHong Zhang   }
14331ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1434dc354874SHong Zhang   PetscFunctionReturn(0);
1435dc354874SHong Zhang }
1436