xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision 74ed9c26ab880fdcd1d3cdbb5b1e39d1b833147d)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
249b5e25fSSatish Balay 
37c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h"
4c60f0209SBarry Smith #include "../src/mat/blockinvert.h"
549b5e25fSSatish Balay #include "petscbt.h"
67c4f633dSBarry Smith #include "../src/mat/impls/sbaij/seq/sbaij.h"
749b5e25fSSatish Balay 
84a2ae208SSatish Balay #undef __FUNCT__
94a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1013f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1149b5e25fSSatish Balay {
125eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
136849ba73SBarry Smith   PetscErrorCode ierr;
145d0c19d7SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
155d0c19d7SBarry Smith   const PetscInt *idx;
16521d7252SBarry Smith   PetscBT        table,table0;
17d94109b8SHong Zhang 
18d94109b8SHong Zhang   PetscFunctionBegin;
19b3bf805bSHong Zhang   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20d94109b8SHong Zhang   mbs = a->mbs;
21d94109b8SHong Zhang   ai  = a->i;
22d94109b8SHong Zhang   aj  = a->j;
23d0f46423SBarry Smith   bs  = A->rmap->bs;
24d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr);
2513f74950SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
26d0f46423SBarry Smith   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
27d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr);
28d94109b8SHong Zhang 
29d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
30d94109b8SHong Zhang     isz  = 0;
31d94109b8SHong Zhang     ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr);
32d94109b8SHong Zhang 
33d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
34d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
35d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
36d94109b8SHong Zhang 
37d94109b8SHong Zhang     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
38dbe03f88SHong Zhang     bcol_max = 0;
39d94109b8SHong Zhang     for (j=0; j<n ; ++j){
40d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
41d94109b8SHong Zhang       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42dbe03f88SHong Zhang       if(!PetscBTLookupSet(table,brow)) {
43dbe03f88SHong Zhang         nidx[isz++] = brow;
44dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
45dbe03f88SHong Zhang       }
46d94109b8SHong Zhang     }
47d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
48d94109b8SHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
49d94109b8SHong Zhang 
50d94109b8SHong Zhang     k = 0;
51d94109b8SHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
52d94109b8SHong Zhang       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53d94109b8SHong Zhang       ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr);
54efee365bSSatish Balay       for (l=k; l<isz; l++) { ierr = PetscBTSet(table0,nidx[l]);CHKERRQ(ierr); }
55d94109b8SHong Zhang 
56d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
57d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++){
58d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
59d94109b8SHong Zhang         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
60d94109b8SHong Zhang           for (l = start; l<end ; l++){
61d94109b8SHong Zhang             bcol = aj[l];
62d94109b8SHong Zhang             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
63d94109b8SHong Zhang           }
64d94109b8SHong Zhang           k++;
65d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
66d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
67d94109b8SHong Zhang           for (l = start; l<end ; l++){
68d94109b8SHong Zhang             bcol = aj[l];
69dbe03f88SHong Zhang             if (bcol > bcol_max) break;
70d94109b8SHong Zhang             if (PetscBTLookup(table0,bcol)){
71d94109b8SHong Zhang               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
72d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
73d94109b8SHong Zhang             }
74d94109b8SHong Zhang           }
75d94109b8SHong Zhang         }
76d94109b8SHong Zhang       }
77d94109b8SHong Zhang     } /* for each overlap */
78d94109b8SHong Zhang 
79d94109b8SHong Zhang     /* expand the Index Set */
80d94109b8SHong Zhang     for (j=0; j<isz; j++) {
81d94109b8SHong Zhang       for (k=0; k<bs; k++)
82d94109b8SHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
83d94109b8SHong Zhang     }
84d94109b8SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
85d94109b8SHong Zhang   }
86d94109b8SHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
87d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
88d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
89d94109b8SHong Zhang   ierr = PetscBTDestroy(table0);CHKERRQ(ierr);
905eee224dSHong Zhang   PetscFunctionReturn(0);
9149b5e25fSSatish Balay }
9249b5e25fSSatish Balay 
934a2ae208SSatish Balay #undef __FUNCT__
944a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
954aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
9649b5e25fSSatish Balay {
9749b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
986849ba73SBarry Smith   PetscErrorCode ierr;
9913f74950SBarry Smith   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
10013f74950SBarry Smith   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
1015d0c19d7SBarry Smith   PetscInt       nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
1025d0c19d7SBarry Smith   const PetscInt *irow,*aj = a->j,*ai = a->i;
10349b5e25fSSatish Balay   MatScalar      *mat_a;
10449b5e25fSSatish Balay   Mat            C;
10514ca34e6SBarry Smith   PetscTruth     flag,sorted;
10649b5e25fSSatish Balay 
10749b5e25fSSatish Balay   PetscFunctionBegin;
108e005ede5SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
10914ca34e6SBarry Smith   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
11014ca34e6SBarry Smith   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
11149b5e25fSSatish Balay 
11249b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11349b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11449b5e25fSSatish Balay 
115*74ed9c26SBarry Smith   ierr  = PetscMalloc(oldcols*sizeof(PetscInt),&smap);CHKERRQ(ierr);
116*74ed9c26SBarry Smith   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
11749b5e25fSSatish Balay   ssmap = smap;
11813f74950SBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
11949b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
12049b5e25fSSatish Balay   /* determine lens of each row */
12149b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12249b5e25fSSatish Balay     kstart  = ai[irow[i]];
12349b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
12449b5e25fSSatish Balay     lens[i] = 0;
12549b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
12649b5e25fSSatish Balay         if (ssmap[aj[k]]) {
12749b5e25fSSatish Balay           lens[i]++;
12849b5e25fSSatish Balay         }
12949b5e25fSSatish Balay       }
13049b5e25fSSatish Balay     }
13149b5e25fSSatish Balay   /* Create and fill new matrix */
13249b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
13349b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
13449b5e25fSSatish Balay 
135d0f46423SBarry Smith     if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
13613f74950SBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
137abc0a331SBarry Smith     if (!flag) {
138347d480fSBarry Smith       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
13949b5e25fSSatish Balay     }
14013f74950SBarry Smith     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
14149b5e25fSSatish Balay     C = *B;
14249b5e25fSSatish Balay   } else {
1437adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
144f69a0ea3SMatthew Knepley     ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1457adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
146ab93d7beSBarry Smith     ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr);
14749b5e25fSSatish Balay   }
14849b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
14949b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
15049b5e25fSSatish Balay     row    = irow[i];
15149b5e25fSSatish Balay     kstart = ai[row];
15249b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
15349b5e25fSSatish Balay     mat_i  = c->i[i];
15449b5e25fSSatish Balay     mat_j  = c->j + mat_i;
15549b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
15649b5e25fSSatish Balay     mat_ilen = c->ilen + i;
15749b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
15849b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
15949b5e25fSSatish Balay         *mat_j++ = tcol - 1;
16049b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
16149b5e25fSSatish Balay         mat_a   += bs2;
16249b5e25fSSatish Balay         (*mat_ilen)++;
16349b5e25fSSatish Balay       }
16449b5e25fSSatish Balay     }
16549b5e25fSSatish Balay   }
16649b5e25fSSatish Balay 
16749b5e25fSSatish Balay   /* Free work space */
16849b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
16949b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
17049b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17149b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17249b5e25fSSatish Balay 
17349b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
17449b5e25fSSatish Balay   *B = C;
17549b5e25fSSatish Balay   PetscFunctionReturn(0);
17649b5e25fSSatish Balay }
17749b5e25fSSatish Balay 
1784a2ae208SSatish Balay #undef __FUNCT__
1794a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
1804aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
18149b5e25fSSatish Balay {
18249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
18349b5e25fSSatish Balay   IS             is1;
1846849ba73SBarry Smith   PetscErrorCode ierr;
1855d0c19d7SBarry Smith   PetscInt       *vary,*iary,nrows,i,bs=A->rmap->bs,count;
1865d0c19d7SBarry Smith   const PetscInt *irow;
18749b5e25fSSatish Balay 
18849b5e25fSSatish Balay   PetscFunctionBegin;
189e005ede5SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
19049b5e25fSSatish Balay 
19149b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
19249b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
19349b5e25fSSatish Balay 
19449b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
19549b5e25fSSatish Balay    and form the IS with compressed IS */
196*74ed9c26SBarry Smith   ierr = PetscMalloc2(a->mbs,PetscInt,&vary,a->mbs,PetscInt,&iary);CHKERRQ(ierr);
197*74ed9c26SBarry Smith   ierr = PetscMemzero(vary,a->mbs*sizeof(PetscInt));CHKERRQ(ierr);
19849b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
19949b5e25fSSatish Balay 
20049b5e25fSSatish Balay   count = 0;
20149b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
202e005ede5SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
20349b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
20449b5e25fSSatish Balay   }
20549b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
206*74ed9c26SBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
207*74ed9c26SBarry Smith   ierr = PetscFree2(vary,iary);CHKERRQ(ierr);
20849b5e25fSSatish Balay 
2094aa3045dSJed Brown   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,scall,B);CHKERRQ(ierr);
210*74ed9c26SBarry Smith   ierr = ISDestroy(is1);CHKERRQ(ierr);
21149b5e25fSSatish Balay   PetscFunctionReturn(0);
21249b5e25fSSatish Balay }
21349b5e25fSSatish Balay 
2144a2ae208SSatish Balay #undef __FUNCT__
2154a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
21613f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
21749b5e25fSSatish Balay {
2186849ba73SBarry Smith   PetscErrorCode ierr;
21913f74950SBarry Smith   PetscInt       i;
22049b5e25fSSatish Balay 
22149b5e25fSSatish Balay   PetscFunctionBegin;
22249b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
22382502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
22449b5e25fSSatish Balay   }
22549b5e25fSSatish Balay 
22649b5e25fSSatish Balay   for (i=0; i<n; i++) {
2274aa3045dSJed Brown     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr);
22849b5e25fSSatish Balay   }
22949b5e25fSSatish Balay   PetscFunctionReturn(0);
23049b5e25fSSatish Balay }
23149b5e25fSSatish Balay 
23249b5e25fSSatish Balay /* -------------------------------------------------------*/
23349b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
23449b5e25fSSatish Balay /* -------------------------------------------------------*/
235d9eff348SSatish Balay #include "petscblaslapack.h"
23649b5e25fSSatish Balay 
23749b5e25fSSatish Balay 
2384a2ae208SSatish Balay #undef __FUNCT__
2394a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
240dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
24149b5e25fSSatish Balay {
24249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
24387828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
24449b5e25fSSatish Balay   MatScalar      *v;
2456849ba73SBarry Smith   PetscErrorCode ierr;
24613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
24798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
24849b5e25fSSatish Balay 
24949b5e25fSSatish Balay   PetscFunctionBegin;
2502dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2511ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2521ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
25349b5e25fSSatish Balay 
25449b5e25fSSatish Balay   v     = a->a;
25549b5e25fSSatish Balay   xb = x;
25649b5e25fSSatish Balay 
25749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
25849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
25949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
26049b5e25fSSatish Balay     ib = aj + *ai;
261831a3094SHong Zhang     jmin = 0;
26298c9bda7SSatish Balay     nonzerorow += (n>0);
2637fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
26449b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
26549b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
266831a3094SHong Zhang       v += 4; jmin++;
2677fbae186SHong Zhang     }
268831a3094SHong Zhang     for (j=jmin; j<n; j++) {
26949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
27049b5e25fSSatish Balay       cval       = ib[j]*2;
27149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
27249b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
27349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
27449b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
27549b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
27649b5e25fSSatish Balay       v  += 4;
27749b5e25fSSatish Balay     }
27849b5e25fSSatish Balay     xb +=2; ai++;
27949b5e25fSSatish Balay   }
28049b5e25fSSatish Balay 
2811ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2821ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
283dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
28449b5e25fSSatish Balay   PetscFunctionReturn(0);
28549b5e25fSSatish Balay }
28649b5e25fSSatish Balay 
2874a2ae208SSatish Balay #undef __FUNCT__
2884a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
289dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
29049b5e25fSSatish Balay {
29149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
29287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
29349b5e25fSSatish Balay   MatScalar      *v;
2946849ba73SBarry Smith   PetscErrorCode ierr;
29513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
29698c9bda7SSatish Balay   PetscInt       nonzerorow=0;
29749b5e25fSSatish Balay 
29849b5e25fSSatish Balay   PetscFunctionBegin;
2992dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3001ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3011ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
30249b5e25fSSatish Balay 
30349b5e25fSSatish Balay   v    = a->a;
30449b5e25fSSatish Balay   xb   = x;
30549b5e25fSSatish Balay 
30649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
30749b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
30849b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
30949b5e25fSSatish Balay     ib = aj + *ai;
310831a3094SHong Zhang     jmin = 0;
31198c9bda7SSatish Balay     nonzerorow += (n>0);
3127fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
31349b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
31449b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
31549b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
316831a3094SHong Zhang       v += 9; jmin++;
3177fbae186SHong Zhang     }
318831a3094SHong Zhang     for (j=jmin; j<n; j++) {
31949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
32049b5e25fSSatish Balay       cval       = ib[j]*3;
32149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
32249b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
32349b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
32449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
32549b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
32649b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
32749b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
32849b5e25fSSatish Balay       v  += 9;
32949b5e25fSSatish Balay     }
33049b5e25fSSatish Balay     xb +=3; ai++;
33149b5e25fSSatish Balay   }
33249b5e25fSSatish Balay 
3331ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3341ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
335dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
33649b5e25fSSatish Balay   PetscFunctionReturn(0);
33749b5e25fSSatish Balay }
33849b5e25fSSatish Balay 
3394a2ae208SSatish Balay #undef __FUNCT__
3404a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
341dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
34249b5e25fSSatish Balay {
34349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
34487828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
34549b5e25fSSatish Balay   MatScalar      *v;
3466849ba73SBarry Smith   PetscErrorCode ierr;
34713f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
34898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
34949b5e25fSSatish Balay 
35049b5e25fSSatish Balay   PetscFunctionBegin;
3512dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3521ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3531ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
35449b5e25fSSatish Balay 
35549b5e25fSSatish Balay   v     = a->a;
35649b5e25fSSatish Balay   xb = x;
35749b5e25fSSatish Balay 
35849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
35949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
36049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
36149b5e25fSSatish Balay     ib = aj + *ai;
362831a3094SHong Zhang     jmin = 0;
36398c9bda7SSatish Balay     nonzerorow += (n>0);
3647fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
36549b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
36649b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
36749b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
36849b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
369831a3094SHong Zhang       v += 16; jmin++;
3707fbae186SHong Zhang     }
371831a3094SHong Zhang     for (j=jmin; j<n; j++) {
37249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
37349b5e25fSSatish Balay       cval       = ib[j]*4;
37449b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
37549b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
37649b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
37749b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
37849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
37949b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
38049b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
38149b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
38249b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
38349b5e25fSSatish Balay       v  += 16;
38449b5e25fSSatish Balay     }
38549b5e25fSSatish Balay     xb +=4; ai++;
38649b5e25fSSatish Balay   }
38749b5e25fSSatish Balay 
3881ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3891ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
390dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
39149b5e25fSSatish Balay   PetscFunctionReturn(0);
39249b5e25fSSatish Balay }
39349b5e25fSSatish Balay 
3944a2ae208SSatish Balay #undef __FUNCT__
3954a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
396dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
39749b5e25fSSatish Balay {
39849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
39987828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
40049b5e25fSSatish Balay   MatScalar      *v;
4016849ba73SBarry Smith   PetscErrorCode ierr;
40213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
40398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
40449b5e25fSSatish Balay 
40549b5e25fSSatish Balay   PetscFunctionBegin;
4062dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4071ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4081ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
40949b5e25fSSatish Balay 
41049b5e25fSSatish Balay   v     = a->a;
41149b5e25fSSatish Balay   xb = x;
41249b5e25fSSatish Balay 
41349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
41449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
41549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
41649b5e25fSSatish Balay     ib = aj + *ai;
417831a3094SHong Zhang     jmin = 0;
41898c9bda7SSatish Balay     nonzerorow += (n>0);
4197fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
42049b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
42149b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
42249b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
42349b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
42449b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
425831a3094SHong Zhang       v += 25; jmin++;
4267fbae186SHong Zhang     }
427831a3094SHong Zhang     for (j=jmin; j<n; j++) {
42849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
42949b5e25fSSatish Balay       cval       = ib[j]*5;
43049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
43149b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
43249b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
43349b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
43449b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
43549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
43649b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
43749b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
43849b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
43949b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
44049b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
44149b5e25fSSatish Balay       v  += 25;
44249b5e25fSSatish Balay     }
44349b5e25fSSatish Balay     xb +=5; ai++;
44449b5e25fSSatish Balay   }
44549b5e25fSSatish Balay 
4461ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4471ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
448dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
44949b5e25fSSatish Balay   PetscFunctionReturn(0);
45049b5e25fSSatish Balay }
45149b5e25fSSatish Balay 
45249b5e25fSSatish Balay 
4534a2ae208SSatish Balay #undef __FUNCT__
4544a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
455dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
45649b5e25fSSatish Balay {
45749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
45887828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
45949b5e25fSSatish Balay   MatScalar      *v;
4606849ba73SBarry Smith   PetscErrorCode ierr;
46113f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
46298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
46349b5e25fSSatish Balay 
46449b5e25fSSatish Balay   PetscFunctionBegin;
4652dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4661ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4671ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
46849b5e25fSSatish Balay 
46949b5e25fSSatish Balay   v     = a->a;
47049b5e25fSSatish Balay   xb = x;
47149b5e25fSSatish Balay 
47249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
47349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
47449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
47549b5e25fSSatish Balay     ib = aj + *ai;
476831a3094SHong Zhang     jmin = 0;
47798c9bda7SSatish Balay     nonzerorow += (n>0);
4787fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
47949b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
48049b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
48149b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
48249b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
48349b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
48449b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
485831a3094SHong Zhang       v += 36; jmin++;
4867fbae186SHong Zhang     }
487831a3094SHong Zhang     for (j=jmin; j<n; j++) {
48849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
48949b5e25fSSatish Balay       cval       = ib[j]*6;
49049b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
49149b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
49249b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
49349b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
49449b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
49549b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
49649b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
49749b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
49849b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
49949b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
50049b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
50149b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
50249b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
50349b5e25fSSatish Balay       v  += 36;
50449b5e25fSSatish Balay     }
50549b5e25fSSatish Balay     xb +=6; ai++;
50649b5e25fSSatish Balay   }
50749b5e25fSSatish Balay 
5081ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5091ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
510dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
51149b5e25fSSatish Balay   PetscFunctionReturn(0);
51249b5e25fSSatish Balay }
5134a2ae208SSatish Balay #undef __FUNCT__
5144a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
515dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
51649b5e25fSSatish Balay {
51749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
51887828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
51949b5e25fSSatish Balay   MatScalar      *v;
5206849ba73SBarry Smith   PetscErrorCode ierr;
52113f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
52298c9bda7SSatish Balay   PetscInt       nonzerorow=0;
52349b5e25fSSatish Balay 
52449b5e25fSSatish Balay   PetscFunctionBegin;
5252dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5261ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5271ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
52849b5e25fSSatish Balay 
52949b5e25fSSatish Balay   v     = a->a;
53049b5e25fSSatish Balay   xb = x;
53149b5e25fSSatish Balay 
53249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
53349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
53449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
53549b5e25fSSatish Balay     ib = aj + *ai;
536831a3094SHong Zhang     jmin = 0;
53798c9bda7SSatish Balay     nonzerorow += (n>0);
5387fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
53949b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
54049b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
54149b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
54249b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
54349b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
54449b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
54549b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
546831a3094SHong Zhang       v += 49; jmin++;
5477fbae186SHong Zhang     }
548831a3094SHong Zhang     for (j=jmin; j<n; j++) {
54949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
55049b5e25fSSatish Balay       cval       = ib[j]*7;
55149b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
55249b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
55349b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
55449b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
55549b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
55649b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
55749b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
55849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
55949b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
56049b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
56149b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
56249b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
56349b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
56449b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
56549b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
56649b5e25fSSatish Balay       v  += 49;
56749b5e25fSSatish Balay     }
56849b5e25fSSatish Balay     xb +=7; ai++;
56949b5e25fSSatish Balay   }
5701ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5711ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
572dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr);
57349b5e25fSSatish Balay   PetscFunctionReturn(0);
57449b5e25fSSatish Balay }
57549b5e25fSSatish Balay 
57649b5e25fSSatish Balay /*
57749b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
57849b5e25fSSatish Balay */
5794a2ae208SSatish Balay #undef __FUNCT__
5804a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
581dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
58249b5e25fSSatish Balay {
58349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
58487828ca2SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
5850b60a74dSHong Zhang   MatScalar      *v;
586dfbe8321SBarry Smith   PetscErrorCode ierr;
587d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
58898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
58949b5e25fSSatish Balay 
59049b5e25fSSatish Balay   PetscFunctionBegin;
5912dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5921ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
5931ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
59449b5e25fSSatish Balay 
59549b5e25fSSatish Balay   aj   = a->j;
59649b5e25fSSatish Balay   v    = a->a;
59749b5e25fSSatish Balay   ii   = a->i;
59849b5e25fSSatish Balay 
59949b5e25fSSatish Balay   if (!a->mult_work) {
600d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
60149b5e25fSSatish Balay   }
60249b5e25fSSatish Balay   work = a->mult_work;
60349b5e25fSSatish Balay 
60449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
60549b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
60649b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
60798c9bda7SSatish Balay     nonzerorow += (n>0);
60849b5e25fSSatish Balay 
60949b5e25fSSatish Balay     /* upper triangular part */
61049b5e25fSSatish Balay     for (j=0; j<n; j++) {
61149b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
61249b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
61349b5e25fSSatish Balay       workt += bs;
61449b5e25fSSatish Balay     }
61549b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
61649b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
61749b5e25fSSatish Balay 
61849b5e25fSSatish Balay     /* strict lower triangular part */
619831a3094SHong Zhang     idx = aj+ii[0];
620831a3094SHong Zhang     if (*idx == i){
62196b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
622831a3094SHong Zhang     }
62396b9376eSHong Zhang 
62449b5e25fSSatish Balay     if (ncols > 0){
62549b5e25fSSatish Balay       workt = work;
62687828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
627831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
628831a3094SHong Zhang       for (j=0; j<n; j++) {
629831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
63049b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
63149b5e25fSSatish Balay         workt += bs;
63249b5e25fSSatish Balay       }
63349b5e25fSSatish Balay     }
63449b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
63549b5e25fSSatish Balay   }
63649b5e25fSSatish Balay 
6371ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6381ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
639dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr);
64049b5e25fSSatish Balay   PetscFunctionReturn(0);
64149b5e25fSSatish Balay }
64249b5e25fSSatish Balay 
6436a65c766SHong Zhang extern PetscErrorCode VecCopy_Seq(Vec,Vec);
6444a2ae208SSatish Balay #undef __FUNCT__
6454a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
646dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
64749b5e25fSSatish Balay {
64849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
649bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1;
65049b5e25fSSatish Balay   MatScalar      *v;
6516849ba73SBarry Smith   PetscErrorCode ierr;
65213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
65398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
65449b5e25fSSatish Balay 
65549b5e25fSSatish Balay   PetscFunctionBegin;
6566a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
6571ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6581ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
65949b5e25fSSatish Balay   v  = a->a;
66049b5e25fSSatish Balay   xb = x;
66149b5e25fSSatish Balay 
66249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
66349b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
66449b5e25fSSatish Balay     x1 = xb[0];
66549b5e25fSSatish Balay     ib = aj + *ai;
666831a3094SHong Zhang     jmin = 0;
66798c9bda7SSatish Balay     nonzerorow += (n>0);
668831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
669831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
670831a3094SHong Zhang     }
671831a3094SHong Zhang     for (j=jmin; j<n; j++) {
67249b5e25fSSatish Balay       cval    = *ib;
67349b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
67449b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
67549b5e25fSSatish Balay     }
67649b5e25fSSatish Balay     xb++; ai++;
67749b5e25fSSatish Balay   }
67849b5e25fSSatish Balay 
6791ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
680bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
68149b5e25fSSatish Balay 
682dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
68349b5e25fSSatish Balay   PetscFunctionReturn(0);
68449b5e25fSSatish Balay }
68549b5e25fSSatish Balay 
6864a2ae208SSatish Balay #undef __FUNCT__
6874a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
688dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
68949b5e25fSSatish Balay {
69049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
691bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2;
69249b5e25fSSatish Balay   MatScalar      *v;
6936849ba73SBarry Smith   PetscErrorCode ierr;
69413f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
69598c9bda7SSatish Balay   PetscInt       nonzerorow=0;
69649b5e25fSSatish Balay 
69749b5e25fSSatish Balay   PetscFunctionBegin;
6986a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
6991ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7001ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
70149b5e25fSSatish Balay 
70249b5e25fSSatish Balay   v  = a->a;
70349b5e25fSSatish Balay   xb = x;
70449b5e25fSSatish Balay 
70549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
70649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
70749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
70849b5e25fSSatish Balay     ib = aj + *ai;
709831a3094SHong Zhang     jmin = 0;
71098c9bda7SSatish Balay     nonzerorow += (n>0);
7117fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
71249b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
71349b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
714831a3094SHong Zhang       v += 4; jmin++;
7157fbae186SHong Zhang     }
716831a3094SHong Zhang     for (j=jmin; j<n; j++) {
71749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
71849b5e25fSSatish Balay       cval       = ib[j]*2;
71949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
72049b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
72149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
72249b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
72349b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
72449b5e25fSSatish Balay       v  += 4;
72549b5e25fSSatish Balay     }
72649b5e25fSSatish Balay     xb +=2; ai++;
72749b5e25fSSatish Balay   }
7281ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
729bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
73049b5e25fSSatish Balay 
731dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
73249b5e25fSSatish Balay   PetscFunctionReturn(0);
73349b5e25fSSatish Balay }
73449b5e25fSSatish Balay 
7354a2ae208SSatish Balay #undef __FUNCT__
7364a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
737dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
73849b5e25fSSatish Balay {
73949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
740bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3;
74149b5e25fSSatish Balay   MatScalar      *v;
7426849ba73SBarry Smith   PetscErrorCode ierr;
74313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
74498c9bda7SSatish Balay   PetscInt       nonzerorow=0;
74549b5e25fSSatish Balay 
74649b5e25fSSatish Balay   PetscFunctionBegin;
7476a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7481ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7491ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
75049b5e25fSSatish Balay 
75149b5e25fSSatish Balay   v     = a->a;
75249b5e25fSSatish Balay   xb = x;
75349b5e25fSSatish Balay 
75449b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
75549b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
75649b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
75749b5e25fSSatish Balay     ib = aj + *ai;
758831a3094SHong Zhang     jmin = 0;
75998c9bda7SSatish Balay     nonzerorow += (n>0);
7607fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
76149b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
76249b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
76349b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
764831a3094SHong Zhang      v += 9; jmin++;
7657fbae186SHong Zhang     }
766831a3094SHong Zhang     for (j=jmin; j<n; j++) {
76749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
76849b5e25fSSatish Balay       cval       = ib[j]*3;
76949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
77049b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
77149b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
77249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
77349b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
77449b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
77549b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
77649b5e25fSSatish Balay       v  += 9;
77749b5e25fSSatish Balay     }
77849b5e25fSSatish Balay     xb +=3; ai++;
77949b5e25fSSatish Balay   }
78049b5e25fSSatish Balay 
7811ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
782bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
78349b5e25fSSatish Balay 
784dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
78549b5e25fSSatish Balay   PetscFunctionReturn(0);
78649b5e25fSSatish Balay }
78749b5e25fSSatish Balay 
7884a2ae208SSatish Balay #undef __FUNCT__
7894a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
790dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
79149b5e25fSSatish Balay {
79249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
793bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
79449b5e25fSSatish Balay   MatScalar      *v;
7956849ba73SBarry Smith   PetscErrorCode ierr;
79613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
79798c9bda7SSatish Balay   PetscInt       nonzerorow=0;
79849b5e25fSSatish Balay 
79949b5e25fSSatish Balay   PetscFunctionBegin;
8006a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8011ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8021ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
80349b5e25fSSatish Balay 
80449b5e25fSSatish Balay   v     = a->a;
80549b5e25fSSatish Balay   xb = x;
80649b5e25fSSatish Balay 
80749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
80849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
80949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
81049b5e25fSSatish Balay     ib = aj + *ai;
811831a3094SHong Zhang     jmin = 0;
81298c9bda7SSatish Balay     nonzerorow += (n>0);
8137fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
81449b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
81549b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
81649b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
81749b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
818831a3094SHong Zhang       v += 16; jmin++;
8197fbae186SHong Zhang     }
820831a3094SHong Zhang     for (j=jmin; j<n; j++) {
82149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
82249b5e25fSSatish Balay       cval       = ib[j]*4;
82349b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
82449b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
82549b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
82649b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
82749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
82849b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
82949b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
83049b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
83149b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
83249b5e25fSSatish Balay       v  += 16;
83349b5e25fSSatish Balay     }
83449b5e25fSSatish Balay     xb +=4; ai++;
83549b5e25fSSatish Balay   }
83649b5e25fSSatish Balay 
8371ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
838bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
83949b5e25fSSatish Balay 
840dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
84149b5e25fSSatish Balay   PetscFunctionReturn(0);
84249b5e25fSSatish Balay }
84349b5e25fSSatish Balay 
8444a2ae208SSatish Balay #undef __FUNCT__
8454a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
846dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
84749b5e25fSSatish Balay {
84849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
849bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
85049b5e25fSSatish Balay   MatScalar      *v;
8516849ba73SBarry Smith   PetscErrorCode ierr;
85213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
85398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
85449b5e25fSSatish Balay 
85549b5e25fSSatish Balay   PetscFunctionBegin;
8566a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8571ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8581ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
85949b5e25fSSatish Balay 
86049b5e25fSSatish Balay   v     = a->a;
86149b5e25fSSatish Balay   xb = x;
86249b5e25fSSatish Balay 
86349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
86449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
86549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
86649b5e25fSSatish Balay     ib = aj + *ai;
867831a3094SHong Zhang     jmin = 0;
86898c9bda7SSatish Balay     nonzerorow += (n>0);
8697fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
87049b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
87149b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
87249b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
87349b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
87449b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
875831a3094SHong Zhang       v += 25; jmin++;
8767fbae186SHong Zhang     }
877831a3094SHong Zhang     for (j=jmin; j<n; j++) {
87849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
87949b5e25fSSatish Balay       cval       = ib[j]*5;
88049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
88149b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
88249b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
88349b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
88449b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
88549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
88649b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
88749b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
88849b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
88949b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
89049b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
89149b5e25fSSatish Balay       v  += 25;
89249b5e25fSSatish Balay     }
89349b5e25fSSatish Balay     xb +=5; ai++;
89449b5e25fSSatish Balay   }
89549b5e25fSSatish Balay 
8961ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
897bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
89849b5e25fSSatish Balay 
899dc0b31edSSatish Balay   ierr = PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
90049b5e25fSSatish Balay   PetscFunctionReturn(0);
90149b5e25fSSatish Balay }
9024a2ae208SSatish Balay #undef __FUNCT__
9034a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
904dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
90549b5e25fSSatish Balay {
90649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
907bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
90849b5e25fSSatish Balay   MatScalar      *v;
9096849ba73SBarry Smith   PetscErrorCode ierr;
91013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
91198c9bda7SSatish Balay   PetscInt       nonzerorow=0;
91249b5e25fSSatish Balay 
91349b5e25fSSatish Balay   PetscFunctionBegin;
9146a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9151ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9161ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
91749b5e25fSSatish Balay 
91849b5e25fSSatish Balay   v     = a->a;
91949b5e25fSSatish Balay   xb = x;
92049b5e25fSSatish Balay 
92149b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
92249b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
92349b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
92449b5e25fSSatish Balay     ib = aj + *ai;
925831a3094SHong Zhang     jmin = 0;
92698c9bda7SSatish Balay     nonzerorow += (n>0);
9277fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
92849b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
92949b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
93049b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
93149b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
93249b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
93349b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
934831a3094SHong Zhang       v += 36; jmin++;
9357fbae186SHong Zhang     }
936831a3094SHong Zhang     for (j=jmin; j<n; j++) {
93749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
93849b5e25fSSatish Balay       cval       = ib[j]*6;
93949b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
94049b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
94149b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
94249b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
94349b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
94449b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
94549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
94649b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
94749b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
94849b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
94949b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
95049b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
95149b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
95249b5e25fSSatish Balay       v  += 36;
95349b5e25fSSatish Balay     }
95449b5e25fSSatish Balay     xb +=6; ai++;
95549b5e25fSSatish Balay   }
95649b5e25fSSatish Balay 
9571ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
958bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
95949b5e25fSSatish Balay 
960dc0b31edSSatish Balay   ierr = PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
96149b5e25fSSatish Balay   PetscFunctionReturn(0);
96249b5e25fSSatish Balay }
96349b5e25fSSatish Balay 
9644a2ae208SSatish Balay #undef __FUNCT__
9654a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
966dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
96749b5e25fSSatish Balay {
96849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
969bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
97049b5e25fSSatish Balay   MatScalar      *v;
9716849ba73SBarry Smith   PetscErrorCode ierr;
97213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
97398c9bda7SSatish Balay   PetscInt       nonzerorow=0;
97449b5e25fSSatish Balay 
97549b5e25fSSatish Balay   PetscFunctionBegin;
9766a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9771ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9781ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
97949b5e25fSSatish Balay 
98049b5e25fSSatish Balay   v     = a->a;
98149b5e25fSSatish Balay   xb = x;
98249b5e25fSSatish Balay 
98349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
98449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
98549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
98649b5e25fSSatish Balay     ib = aj + *ai;
987831a3094SHong Zhang     jmin = 0;
98898c9bda7SSatish Balay     nonzerorow += (n>0);
9897fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
99049b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
99149b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
99249b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
99349b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
99449b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
99549b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
99649b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
997831a3094SHong Zhang       v += 49; jmin++;
9987fbae186SHong Zhang     }
999831a3094SHong Zhang     for (j=jmin; j<n; j++) {
100049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
100149b5e25fSSatish Balay       cval       = ib[j]*7;
100249b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
100349b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
100449b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
100549b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
100649b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
100749b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
100849b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
100949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
101049b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
101149b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
101249b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
101349b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
101449b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
101549b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
101649b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
101749b5e25fSSatish Balay       v  += 49;
101849b5e25fSSatish Balay     }
101949b5e25fSSatish Balay     xb +=7; ai++;
102049b5e25fSSatish Balay   }
102149b5e25fSSatish Balay 
10221ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1023bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
102449b5e25fSSatish Balay 
1025dc0b31edSSatish Balay   ierr = PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
102649b5e25fSSatish Balay   PetscFunctionReturn(0);
102749b5e25fSSatish Balay }
102849b5e25fSSatish Balay 
10294a2ae208SSatish Balay #undef __FUNCT__
10304a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1031dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
103249b5e25fSSatish Balay {
103349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1034bba805e6SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1035066653e3SSatish Balay   MatScalar      *v;
1036dfbe8321SBarry Smith   PetscErrorCode ierr;
1037d0f46423SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
103898c9bda7SSatish Balay   PetscInt       nonzerorow=0;
103949b5e25fSSatish Balay 
104049b5e25fSSatish Balay   PetscFunctionBegin;
10416a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
10421ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
10431ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
104449b5e25fSSatish Balay 
104549b5e25fSSatish Balay   aj   = a->j;
104649b5e25fSSatish Balay   v    = a->a;
104749b5e25fSSatish Balay   ii   = a->i;
104849b5e25fSSatish Balay 
104949b5e25fSSatish Balay   if (!a->mult_work) {
1050d0f46423SBarry Smith     ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
105149b5e25fSSatish Balay   }
105249b5e25fSSatish Balay   work = a->mult_work;
105349b5e25fSSatish Balay 
105449b5e25fSSatish Balay 
105549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
105649b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
105749b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
105898c9bda7SSatish Balay     nonzerorow += (n>0);
105949b5e25fSSatish Balay 
106049b5e25fSSatish Balay     /* upper triangular part */
106149b5e25fSSatish Balay     for (j=0; j<n; j++) {
106249b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
106349b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
106449b5e25fSSatish Balay       workt += bs;
106549b5e25fSSatish Balay     }
106649b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
106749b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
106849b5e25fSSatish Balay 
106949b5e25fSSatish Balay     /* strict lower triangular part */
1070831a3094SHong Zhang     idx = aj+ii[0];
1071831a3094SHong Zhang     if (*idx == i){
107296b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1073831a3094SHong Zhang     }
107449b5e25fSSatish Balay     if (ncols > 0){
107549b5e25fSSatish Balay       workt = work;
107687828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1077831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1078831a3094SHong Zhang       for (j=0; j<n; j++) {
1079831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
108049b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
108149b5e25fSSatish Balay         workt += bs;
108249b5e25fSSatish Balay       }
108349b5e25fSSatish Balay     }
108449b5e25fSSatish Balay 
108549b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
108649b5e25fSSatish Balay   }
108749b5e25fSSatish Balay 
10881ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1089bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
109049b5e25fSSatish Balay 
1091dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));CHKERRQ(ierr);
109249b5e25fSSatish Balay   PetscFunctionReturn(0);
109349b5e25fSSatish Balay }
109449b5e25fSSatish Balay 
10954a2ae208SSatish Balay #undef __FUNCT__
10964a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1097f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
109849b5e25fSSatish Balay {
109949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1100f4df32b1SMatthew Knepley   PetscScalar    oalpha = alpha;
1101efee365bSSatish Balay   PetscErrorCode ierr;
11020805154bSBarry Smith   PetscBLASInt   one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz);
110349b5e25fSSatish Balay 
110449b5e25fSSatish Balay   PetscFunctionBegin;
1105f4df32b1SMatthew Knepley   BLASscal_(&totalnz,&oalpha,a->a,&one);
1106efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
110749b5e25fSSatish Balay   PetscFunctionReturn(0);
110849b5e25fSSatish Balay }
110949b5e25fSSatish Balay 
11104a2ae208SSatish Balay #undef __FUNCT__
11114a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1112dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
111349b5e25fSSatish Balay {
111449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
111549b5e25fSSatish Balay   MatScalar      *v = a->a;
111649b5e25fSSatish Balay   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1117d0f46423SBarry Smith   PetscInt       i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
11186849ba73SBarry Smith   PetscErrorCode ierr;
111913f74950SBarry Smith   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
112049b5e25fSSatish Balay 
112149b5e25fSSatish Balay   PetscFunctionBegin;
112249b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
112349b5e25fSSatish Balay     for (k=0; k<mbs; k++){
112449b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1125831a3094SHong Zhang       col  = aj + jmin;
1126831a3094SHong Zhang       if (*col == k){         /* diagonal block */
112749b5e25fSSatish Balay         for (i=0; i<bs2; i++){
112849b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
112949b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
113049b5e25fSSatish Balay #else
113149b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
113249b5e25fSSatish Balay #endif
113349b5e25fSSatish Balay         }
1134831a3094SHong Zhang         jmin++;
1135831a3094SHong Zhang       }
1136831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
113749b5e25fSSatish Balay         for (i=0; i<bs2; i++){
113849b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
113949b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
114049b5e25fSSatish Balay #else
114149b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
114249b5e25fSSatish Balay #endif
114349b5e25fSSatish Balay         }
114449b5e25fSSatish Balay       }
114549b5e25fSSatish Balay     }
114649b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
11470b8dc8d2SHong Zhang   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
1148*74ed9c26SBarry Smith     ierr = PetscMalloc3(bs,PetscReal,&sum,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
11490b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11500b8dc8d2SHong Zhang     il[0] = 0;
115149b5e25fSSatish Balay 
115249b5e25fSSatish Balay     *norm = 0.0;
115349b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
115449b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
115549b5e25fSSatish Balay       /*-- col sum --*/
115649b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1157831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1158831a3094SHong Zhang                   at step k */
115949b5e25fSSatish Balay       while (i<mbs){
116049b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
116149b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
116249b5e25fSSatish Balay         for (j=0; j<bs; j++){
116349b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
116449b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
116549b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
116649b5e25fSSatish Balay           }
116749b5e25fSSatish Balay         }
116849b5e25fSSatish Balay         /* update il, jl */
1169831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1170831a3094SHong Zhang         jmax = a->i[i+1];
117149b5e25fSSatish Balay         if (jmin < jmax){
117249b5e25fSSatish Balay           il[i] = jmin;
117349b5e25fSSatish Balay           j   = a->j[jmin];
117449b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
117549b5e25fSSatish Balay         }
117649b5e25fSSatish Balay         i = nexti;
117749b5e25fSSatish Balay       }
117849b5e25fSSatish Balay       /*-- row sum --*/
117949b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
118049b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
118149b5e25fSSatish Balay         for (j=0; j<bs; j++){
118249b5e25fSSatish Balay           v = a->a + i*bs2 + j;
118349b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
11840b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
118549b5e25fSSatish Balay           }
118649b5e25fSSatish Balay         }
118749b5e25fSSatish Balay       }
118849b5e25fSSatish Balay       /* add k_th block row to il, jl */
1189831a3094SHong Zhang       col = aj+jmin;
1190831a3094SHong Zhang       if (*col == k) jmin++;
119149b5e25fSSatish Balay       if (jmin < jmax){
119249b5e25fSSatish Balay         il[k] = jmin;
11930b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
119449b5e25fSSatish Balay       }
119549b5e25fSSatish Balay       for (j=0; j<bs; j++){
119649b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
119749b5e25fSSatish Balay       }
119849b5e25fSSatish Balay     }
1199*74ed9c26SBarry Smith     ierr = PetscFree3(sum,il,jl);CHKERRQ(ierr);
120049b5e25fSSatish Balay   } else {
1201347d480fSBarry Smith     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
120249b5e25fSSatish Balay   }
120349b5e25fSSatish Balay   PetscFunctionReturn(0);
120449b5e25fSSatish Balay }
120549b5e25fSSatish Balay 
12064a2ae208SSatish Balay #undef __FUNCT__
12074a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1208dfbe8321SBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
120949b5e25fSSatish Balay {
121049b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1211dfbe8321SBarry Smith   PetscErrorCode ierr;
121249b5e25fSSatish Balay 
121349b5e25fSSatish Balay   PetscFunctionBegin;
121449b5e25fSSatish Balay 
121549b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1216d0f46423SBarry Smith   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1217ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1218ef511fbeSHong Zhang     PetscFunctionReturn(0);
121949b5e25fSSatish Balay   }
122049b5e25fSSatish Balay 
122149b5e25fSSatish Balay   /* if the a->i are the same */
122213f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1223abc0a331SBarry Smith   if (!*flg) {
122449b5e25fSSatish Balay     PetscFunctionReturn(0);
122549b5e25fSSatish Balay   }
122649b5e25fSSatish Balay 
122749b5e25fSSatish Balay   /* if a->j are the same */
122813f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1229abc0a331SBarry Smith   if (!*flg) {
123049b5e25fSSatish Balay     PetscFunctionReturn(0);
123149b5e25fSSatish Balay   }
123249b5e25fSSatish Balay   /* if a->a are the same */
1233d0f46423SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1234935af2e7SHong Zhang   PetscFunctionReturn(0);
123549b5e25fSSatish Balay }
123649b5e25fSSatish Balay 
12374a2ae208SSatish Balay #undef __FUNCT__
12384a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1239dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
124049b5e25fSSatish Balay {
124149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1242dfbe8321SBarry Smith   PetscErrorCode ierr;
124313f74950SBarry Smith   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
124487828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
124549b5e25fSSatish Balay   MatScalar      *aa,*aa_j;
124649b5e25fSSatish Balay 
124749b5e25fSSatish Balay   PetscFunctionBegin;
1248d0f46423SBarry Smith   bs   = A->rmap->bs;
124982799104SHong Zhang   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
125082799104SHong Zhang 
125149b5e25fSSatish Balay   aa   = a->a;
125249b5e25fSSatish Balay   ai   = a->i;
125349b5e25fSSatish Balay   aj   = a->j;
125449b5e25fSSatish Balay   ambs = a->mbs;
125549b5e25fSSatish Balay   bs2  = a->bs2;
125649b5e25fSSatish Balay 
12572dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
12581ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
125949b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1260d0f46423SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
126149b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
126249b5e25fSSatish Balay     j=ai[i];
126349b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
126449b5e25fSSatish Balay       row  = i*bs;
126549b5e25fSSatish Balay       aa_j = aa + j*bs2;
126682799104SHong Zhang       if (A->factor && bs==1){
126782799104SHong Zhang         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
126882799104SHong Zhang       } else {
126949b5e25fSSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
127049b5e25fSSatish Balay       }
127149b5e25fSSatish Balay     }
127282799104SHong Zhang   }
127382799104SHong Zhang 
12741ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
127549b5e25fSSatish Balay   PetscFunctionReturn(0);
127649b5e25fSSatish Balay }
127749b5e25fSSatish Balay 
12784a2ae208SSatish Balay #undef __FUNCT__
12794a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1280dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
128149b5e25fSSatish Balay {
128249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
12835e90f9d9SHong Zhang   PetscScalar    *l,x,*li,*ri;
128449b5e25fSSatish Balay   MatScalar      *aa,*v;
1285dfbe8321SBarry Smith   PetscErrorCode ierr;
12865e90f9d9SHong Zhang   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1287b3bf805bSHong Zhang   PetscTruth     flg;
128849b5e25fSSatish Balay 
128949b5e25fSSatish Balay   PetscFunctionBegin;
1290b3bf805bSHong Zhang   if (ll != rr){
1291b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1292b3bf805bSHong Zhang     if (!flg)
1293b3bf805bSHong Zhang       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1294b3bf805bSHong Zhang   }
1295b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
129649b5e25fSSatish Balay   ai  = a->i;
129749b5e25fSSatish Balay   aj  = a->j;
129849b5e25fSSatish Balay   aa  = a->a;
1299d0f46423SBarry Smith   m   = A->rmap->N;
1300d0f46423SBarry Smith   bs  = A->rmap->bs;
130149b5e25fSSatish Balay   mbs = a->mbs;
130249b5e25fSSatish Balay   bs2 = a->bs2;
130349b5e25fSSatish Balay 
13041ebc52fbSHong Zhang   ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
130549b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1306347d480fSBarry Smith   if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
130749b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
130849b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
130949b5e25fSSatish Balay     li = l + i*bs;
131049b5e25fSSatish Balay     v  = aa + bs2*ai[i];
131149b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
131249b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
13135e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
131449b5e25fSSatish Balay         x = ri[k];
131549b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
131649b5e25fSSatish Balay       }
131749b5e25fSSatish Balay     }
131849b5e25fSSatish Balay   }
13191ebc52fbSHong Zhang   ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1320dc0b31edSSatish Balay   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
132149b5e25fSSatish Balay   PetscFunctionReturn(0);
132249b5e25fSSatish Balay }
132349b5e25fSSatish Balay 
13244a2ae208SSatish Balay #undef __FUNCT__
13254a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1326dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
132749b5e25fSSatish Balay {
132849b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
132949b5e25fSSatish Balay 
133049b5e25fSSatish Balay   PetscFunctionBegin;
133149b5e25fSSatish Balay   info->block_size     = a->bs2;
13326c6c5352SBarry Smith   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
13336c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
133449b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
133549b5e25fSSatish Balay   info->assemblies   = A->num_ass;
133649b5e25fSSatish Balay   info->mallocs      = a->reallocs;
13377adad957SLisandro Dalcin   info->memory       = ((PetscObject)A)->mem;
133849b5e25fSSatish Balay   if (A->factor) {
133949b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
134049b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
134149b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
134249b5e25fSSatish Balay   } else {
134349b5e25fSSatish Balay     info->fill_ratio_given  = 0;
134449b5e25fSSatish Balay     info->fill_ratio_needed = 0;
134549b5e25fSSatish Balay     info->factor_mallocs    = 0;
134649b5e25fSSatish Balay   }
134749b5e25fSSatish Balay   PetscFunctionReturn(0);
134849b5e25fSSatish Balay }
134949b5e25fSSatish Balay 
135049b5e25fSSatish Balay 
13514a2ae208SSatish Balay #undef __FUNCT__
13524a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1353dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
135449b5e25fSSatish Balay {
135549b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1356dfbe8321SBarry Smith   PetscErrorCode ierr;
135749b5e25fSSatish Balay 
135849b5e25fSSatish Balay   PetscFunctionBegin;
135949b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
136049b5e25fSSatish Balay   PetscFunctionReturn(0);
136149b5e25fSSatish Balay }
1362dc354874SHong Zhang 
13634a2ae208SSatish Balay #undef __FUNCT__
1364985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ"
1365985db425SBarry Smith /*
1366985db425SBarry Smith    This code does not work since it only checks the upper triangular part of
1367985db425SBarry Smith   the matrix. Hence it is not listed in the function table.
1368985db425SBarry Smith */
1369985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1370dc354874SHong Zhang {
1371dc354874SHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1372dfbe8321SBarry Smith   PetscErrorCode ierr;
137313f74950SBarry Smith   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1374c3fca9a7SHong Zhang   PetscReal      atmp;
1375273d9f13SBarry Smith   MatScalar      *aa;
1376985db425SBarry Smith   PetscScalar    *x;
137713f74950SBarry Smith   PetscInt       ncols,brow,bcol,krow,kcol;
1378dc354874SHong Zhang 
1379dc354874SHong Zhang   PetscFunctionBegin;
1380985db425SBarry Smith   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1381dc354874SHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1382d0f46423SBarry Smith   bs   = A->rmap->bs;
1383dc354874SHong Zhang   aa   = a->a;
1384dc354874SHong Zhang   ai   = a->i;
1385dc354874SHong Zhang   aj   = a->j;
138644117c81SHong Zhang   mbs = a->mbs;
1387dc354874SHong Zhang 
1388985db425SBarry Smith   ierr = VecSet(v,0.0);CHKERRQ(ierr);
13891ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1390dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1391d0f46423SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
139244117c81SHong Zhang   for (i=0; i<mbs; i++) {
1393d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1394d0f6400bSHong Zhang     brow  = bs*i;
139544117c81SHong Zhang     for (j=0; j<ncols; j++){
1396d0f6400bSHong Zhang       bcol = bs*(*aj);
139744117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1398d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
139944117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1400d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1401d0f6400bSHong Zhang           row = brow + krow;    /* row index */
1402c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1403c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
140444117c81SHong Zhang         }
140544117c81SHong Zhang       }
1406d0f6400bSHong Zhang       aj++;
1407dc354874SHong Zhang     }
1408dc354874SHong Zhang   }
14091ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1410dc354874SHong Zhang   PetscFunctionReturn(0);
1411dc354874SHong Zhang }
1412