xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision 7adad9577d7f34aa90d02da3975546c0571d04a7)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
249b5e25fSSatish Balay 
349b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h"
449b5e25fSSatish Balay #include "src/inline/spops.h"
549b5e25fSSatish Balay #include "src/inline/ilu.h"
649b5e25fSSatish Balay #include "petscbt.h"
73a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h"
849b5e25fSSatish Balay 
94a2ae208SSatish Balay #undef __FUNCT__
104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1113f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1249b5e25fSSatish Balay {
135eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
146849ba73SBarry Smith   PetscErrorCode ierr;
15521d7252SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*idx,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
16521d7252SBarry Smith   PetscBT        table,table0;
17d94109b8SHong Zhang 
18d94109b8SHong Zhang   PetscFunctionBegin;
19b3bf805bSHong Zhang   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20d94109b8SHong Zhang   mbs = a->mbs;
21d94109b8SHong Zhang   ai  = a->i;
22d94109b8SHong Zhang   aj  = a->j;
23899cda47SBarry Smith   bs  = A->rmap.bs;
24d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr);
2513f74950SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
26899cda47SBarry Smith   ierr = PetscMalloc((A->rmap.N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
27d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr);
28d94109b8SHong Zhang 
29d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
30d94109b8SHong Zhang     isz  = 0;
31d94109b8SHong Zhang     ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr);
32d94109b8SHong Zhang 
33d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
34d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
35d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
36d94109b8SHong Zhang 
37d94109b8SHong Zhang     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
38dbe03f88SHong Zhang     bcol_max = 0;
39d94109b8SHong Zhang     for (j=0; j<n ; ++j){
40d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
41d94109b8SHong Zhang       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42dbe03f88SHong Zhang       if(!PetscBTLookupSet(table,brow)) {
43dbe03f88SHong Zhang         nidx[isz++] = brow;
44dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
45dbe03f88SHong Zhang       }
46d94109b8SHong Zhang     }
47d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
48d94109b8SHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
49d94109b8SHong Zhang 
50d94109b8SHong Zhang     k = 0;
51d94109b8SHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
52d94109b8SHong Zhang       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53d94109b8SHong Zhang       ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr);
54efee365bSSatish Balay       for (l=k; l<isz; l++) { ierr = PetscBTSet(table0,nidx[l]);CHKERRQ(ierr); }
55d94109b8SHong Zhang 
56d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
57d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++){
58d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
59d94109b8SHong Zhang         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
60d94109b8SHong Zhang           for (l = start; l<end ; l++){
61d94109b8SHong Zhang             bcol = aj[l];
62d94109b8SHong Zhang             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
63d94109b8SHong Zhang           }
64d94109b8SHong Zhang           k++;
65d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
66d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
67d94109b8SHong Zhang           for (l = start; l<end ; l++){
68d94109b8SHong Zhang             bcol = aj[l];
69dbe03f88SHong Zhang             if (bcol > bcol_max) break;
70d94109b8SHong Zhang             if (PetscBTLookup(table0,bcol)){
71d94109b8SHong Zhang               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
72d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
73d94109b8SHong Zhang             }
74d94109b8SHong Zhang           }
75d94109b8SHong Zhang         }
76d94109b8SHong Zhang       }
77d94109b8SHong Zhang     } /* for each overlap */
78d94109b8SHong Zhang 
79d94109b8SHong Zhang     /* expand the Index Set */
80d94109b8SHong Zhang     for (j=0; j<isz; j++) {
81d94109b8SHong Zhang       for (k=0; k<bs; k++)
82d94109b8SHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
83d94109b8SHong Zhang     }
84d94109b8SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
85d94109b8SHong Zhang   }
86d94109b8SHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
87d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
88d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
89d94109b8SHong Zhang   ierr = PetscBTDestroy(table0);CHKERRQ(ierr);
905eee224dSHong Zhang   PetscFunctionReturn(0);
9149b5e25fSSatish Balay }
9249b5e25fSSatish Balay 
934a2ae208SSatish Balay #undef __FUNCT__
944a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
9513f74950SBarry Smith PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
9649b5e25fSSatish Balay {
9749b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
986849ba73SBarry Smith   PetscErrorCode ierr;
9913f74950SBarry Smith   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
10013f74950SBarry Smith   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
101899cda47SBarry Smith   PetscInt       *irow,nrows,*ssmap,bs=A->rmap.bs,bs2=a->bs2;
10213f74950SBarry Smith   PetscInt       *aj = a->j,*ai = a->i;
10349b5e25fSSatish Balay   MatScalar      *mat_a;
10449b5e25fSSatish Balay   Mat            C;
10549b5e25fSSatish Balay   PetscTruth     flag;
10649b5e25fSSatish Balay 
10749b5e25fSSatish Balay   PetscFunctionBegin;
108e005ede5SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
10949b5e25fSSatish Balay   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
110347d480fSBarry Smith   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
11149b5e25fSSatish Balay 
11249b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11349b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11449b5e25fSSatish Balay 
11513f74950SBarry Smith   ierr  = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr);
11649b5e25fSSatish Balay   ssmap = smap;
11713f74950SBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
11813f74950SBarry Smith   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
11949b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
12049b5e25fSSatish Balay   /* determine lens of each row */
12149b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12249b5e25fSSatish Balay     kstart  = ai[irow[i]];
12349b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
12449b5e25fSSatish Balay     lens[i] = 0;
12549b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
12649b5e25fSSatish Balay         if (ssmap[aj[k]]) {
12749b5e25fSSatish Balay           lens[i]++;
12849b5e25fSSatish Balay         }
12949b5e25fSSatish Balay       }
13049b5e25fSSatish Balay     }
13149b5e25fSSatish Balay   /* Create and fill new matrix */
13249b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
13349b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
13449b5e25fSSatish Balay 
135899cda47SBarry Smith     if (c->mbs!=nrows || (*B)->rmap.bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
13613f74950SBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
137abc0a331SBarry Smith     if (!flag) {
138347d480fSBarry Smith       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
13949b5e25fSSatish Balay     }
14013f74950SBarry Smith     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
14149b5e25fSSatish Balay     C = *B;
14249b5e25fSSatish Balay   } else {
143*7adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
144f69a0ea3SMatthew Knepley     ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
145*7adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
146ab93d7beSBarry Smith     ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr);
14749b5e25fSSatish Balay   }
14849b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
14949b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
15049b5e25fSSatish Balay     row    = irow[i];
15149b5e25fSSatish Balay     kstart = ai[row];
15249b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
15349b5e25fSSatish Balay     mat_i  = c->i[i];
15449b5e25fSSatish Balay     mat_j  = c->j + mat_i;
15549b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
15649b5e25fSSatish Balay     mat_ilen = c->ilen + i;
15749b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
15849b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
15949b5e25fSSatish Balay         *mat_j++ = tcol - 1;
16049b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
16149b5e25fSSatish Balay         mat_a   += bs2;
16249b5e25fSSatish Balay         (*mat_ilen)++;
16349b5e25fSSatish Balay       }
16449b5e25fSSatish Balay     }
16549b5e25fSSatish Balay   }
16649b5e25fSSatish Balay 
16749b5e25fSSatish Balay   /* Free work space */
16849b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
16949b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
17049b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17149b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17249b5e25fSSatish Balay 
17349b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
17449b5e25fSSatish Balay   *B = C;
17549b5e25fSSatish Balay   PetscFunctionReturn(0);
17649b5e25fSSatish Balay }
17749b5e25fSSatish Balay 
1784a2ae208SSatish Balay #undef __FUNCT__
1794a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
18013f74950SBarry Smith PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
18149b5e25fSSatish Balay {
18249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
18349b5e25fSSatish Balay   IS             is1;
1846849ba73SBarry Smith   PetscErrorCode ierr;
185899cda47SBarry Smith   PetscInt       *vary,*iary,*irow,nrows,i,bs=A->rmap.bs,count;
18649b5e25fSSatish Balay 
18749b5e25fSSatish Balay   PetscFunctionBegin;
188e005ede5SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
18949b5e25fSSatish Balay 
19049b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
19149b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
19249b5e25fSSatish Balay 
19349b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
19449b5e25fSSatish Balay    and form the IS with compressed IS */
19513f74950SBarry Smith   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr);
19649b5e25fSSatish Balay   iary = vary + a->mbs;
19713f74950SBarry Smith   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
19849b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
19949b5e25fSSatish Balay 
20049b5e25fSSatish Balay   count = 0;
20149b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
202e005ede5SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
20349b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
20449b5e25fSSatish Balay   }
20549b5e25fSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
20649b5e25fSSatish Balay 
20749b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
20849b5e25fSSatish Balay   ierr = PetscFree(vary);CHKERRQ(ierr);
20949b5e25fSSatish Balay 
21049b5e25fSSatish Balay   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr);
21149b5e25fSSatish Balay   ISDestroy(is1);
21249b5e25fSSatish Balay   PetscFunctionReturn(0);
21349b5e25fSSatish Balay }
21449b5e25fSSatish Balay 
2154a2ae208SSatish Balay #undef __FUNCT__
2164a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
21713f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
21849b5e25fSSatish Balay {
2196849ba73SBarry Smith   PetscErrorCode ierr;
22013f74950SBarry Smith   PetscInt       i;
22149b5e25fSSatish Balay 
22249b5e25fSSatish Balay   PetscFunctionBegin;
22349b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
22482502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
22549b5e25fSSatish Balay   }
22649b5e25fSSatish Balay 
22749b5e25fSSatish Balay   for (i=0; i<n; i++) {
22849b5e25fSSatish Balay     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
22949b5e25fSSatish Balay   }
23049b5e25fSSatish Balay   PetscFunctionReturn(0);
23149b5e25fSSatish Balay }
23249b5e25fSSatish Balay 
23349b5e25fSSatish Balay /* -------------------------------------------------------*/
23449b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
23549b5e25fSSatish Balay /* -------------------------------------------------------*/
236d9eff348SSatish Balay #include "petscblaslapack.h"
23749b5e25fSSatish Balay 
2384a2ae208SSatish Balay #undef __FUNCT__
2394a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1"
240dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
24149b5e25fSSatish Balay {
24249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
24387828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,zero=0.0;
24449b5e25fSSatish Balay   MatScalar      *v;
2456849ba73SBarry Smith   PetscErrorCode ierr;
24613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
24749b5e25fSSatish Balay 
24849b5e25fSSatish Balay   PetscFunctionBegin;
2492dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2501ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2511ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
25249b5e25fSSatish Balay 
25349b5e25fSSatish Balay   v  = a->a;
25449b5e25fSSatish Balay   xb = x;
25549b5e25fSSatish Balay 
25649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
25749b5e25fSSatish Balay     n    = ai[1] - ai[0];  /* length of i_th row of A */
2583cb60589SBarry Smith     x1   = *xb++;
2593cb60589SBarry Smith     ib   = aj + *ai++;
260831a3094SHong Zhang     jmin = 0;
2613cb60589SBarry Smith     /* if we ALWAYS required a diagonal entry then could remove this if test */
2623cb60589SBarry Smith     /* should we use a tmp to hold the accumulated z[i] */
263831a3094SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
264831a3094SHong Zhang       z[i] += *v++ * x[*ib++];
265831a3094SHong Zhang       jmin++;
266831a3094SHong Zhang     }
267831a3094SHong Zhang     for (j=jmin; j<n; j++) {
26849b5e25fSSatish Balay       cval    = *ib;
26949b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
27049b5e25fSSatish Balay       z[i]    += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
27149b5e25fSSatish Balay     }
27249b5e25fSSatish Balay   }
27349b5e25fSSatish Balay 
2741ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2751ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
276899cda47SBarry Smith   ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr);  /* nz = (nz+m)/2 */
27749b5e25fSSatish Balay   PetscFunctionReturn(0);
27849b5e25fSSatish Balay }
27949b5e25fSSatish Balay 
2804a2ae208SSatish Balay #undef __FUNCT__
2814a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
282dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
28349b5e25fSSatish Balay {
28449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
28587828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
28649b5e25fSSatish Balay   MatScalar      *v;
2876849ba73SBarry Smith   PetscErrorCode ierr;
28813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
28949b5e25fSSatish Balay 
29049b5e25fSSatish Balay   PetscFunctionBegin;
2912dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2921ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2931ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
29449b5e25fSSatish Balay 
29549b5e25fSSatish Balay   v     = a->a;
29649b5e25fSSatish Balay   xb = x;
29749b5e25fSSatish Balay 
29849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
29949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
30049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
30149b5e25fSSatish Balay     ib = aj + *ai;
302831a3094SHong Zhang     jmin = 0;
3037fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
30449b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
30549b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
306831a3094SHong Zhang       v += 4; jmin++;
3077fbae186SHong Zhang     }
308831a3094SHong Zhang     for (j=jmin; j<n; j++) {
30949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
31049b5e25fSSatish Balay       cval       = ib[j]*2;
31149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
31249b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
31349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
31449b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
31549b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
31649b5e25fSSatish Balay       v  += 4;
31749b5e25fSSatish Balay     }
31849b5e25fSSatish Balay     xb +=2; ai++;
31949b5e25fSSatish Balay   }
32049b5e25fSSatish Balay 
3211ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3221ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
323899cda47SBarry Smith   ierr = PetscLogFlops(8*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr);
32449b5e25fSSatish Balay   PetscFunctionReturn(0);
32549b5e25fSSatish Balay }
32649b5e25fSSatish Balay 
3274a2ae208SSatish Balay #undef __FUNCT__
3284a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
329dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
33049b5e25fSSatish Balay {
33149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
33287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
33349b5e25fSSatish Balay   MatScalar      *v;
3346849ba73SBarry Smith   PetscErrorCode ierr;
33513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
33649b5e25fSSatish Balay 
33749b5e25fSSatish Balay   PetscFunctionBegin;
3382dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3391ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3401ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
34149b5e25fSSatish Balay 
34249b5e25fSSatish Balay   v    = a->a;
34349b5e25fSSatish Balay   xb   = x;
34449b5e25fSSatish Balay 
34549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
34649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
34749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
34849b5e25fSSatish Balay     ib = aj + *ai;
349831a3094SHong Zhang     jmin = 0;
3507fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
35149b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
35249b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
35349b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
354831a3094SHong Zhang       v += 9; jmin++;
3557fbae186SHong Zhang     }
356831a3094SHong Zhang     for (j=jmin; j<n; j++) {
35749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
35849b5e25fSSatish Balay       cval       = ib[j]*3;
35949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
36049b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
36149b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
36249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
36349b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
36449b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
36549b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
36649b5e25fSSatish Balay       v  += 9;
36749b5e25fSSatish Balay     }
36849b5e25fSSatish Balay     xb +=3; ai++;
36949b5e25fSSatish Balay   }
37049b5e25fSSatish Balay 
3711ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3721ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
373899cda47SBarry Smith   ierr = PetscLogFlops(18*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr);
37449b5e25fSSatish Balay   PetscFunctionReturn(0);
37549b5e25fSSatish Balay }
37649b5e25fSSatish Balay 
3774a2ae208SSatish Balay #undef __FUNCT__
3784a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
379dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
38049b5e25fSSatish Balay {
38149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
38287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
38349b5e25fSSatish Balay   MatScalar      *v;
3846849ba73SBarry Smith   PetscErrorCode ierr;
38513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
38649b5e25fSSatish Balay 
38749b5e25fSSatish Balay   PetscFunctionBegin;
3882dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3891ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3901ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
39149b5e25fSSatish Balay 
39249b5e25fSSatish Balay   v     = a->a;
39349b5e25fSSatish Balay   xb = x;
39449b5e25fSSatish Balay 
39549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
39649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
39749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
39849b5e25fSSatish Balay     ib = aj + *ai;
399831a3094SHong Zhang     jmin = 0;
4007fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
40149b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
40249b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
40349b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
40449b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
405831a3094SHong Zhang       v += 16; jmin++;
4067fbae186SHong Zhang     }
407831a3094SHong Zhang     for (j=jmin; j<n; j++) {
40849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
40949b5e25fSSatish Balay       cval       = ib[j]*4;
41049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
41149b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
41249b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
41349b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
41449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
41549b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
41649b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
41749b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
41849b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
41949b5e25fSSatish Balay       v  += 16;
42049b5e25fSSatish Balay     }
42149b5e25fSSatish Balay     xb +=4; ai++;
42249b5e25fSSatish Balay   }
42349b5e25fSSatish Balay 
4241ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4251ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
426899cda47SBarry Smith   ierr = PetscLogFlops(32*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr);
42749b5e25fSSatish Balay   PetscFunctionReturn(0);
42849b5e25fSSatish Balay }
42949b5e25fSSatish Balay 
4304a2ae208SSatish Balay #undef __FUNCT__
4314a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
432dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
43349b5e25fSSatish Balay {
43449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
43587828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
43649b5e25fSSatish Balay   MatScalar      *v;
4376849ba73SBarry Smith   PetscErrorCode ierr;
43813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
43949b5e25fSSatish Balay 
44049b5e25fSSatish Balay   PetscFunctionBegin;
4412dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4421ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4431ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
44449b5e25fSSatish Balay 
44549b5e25fSSatish Balay   v     = a->a;
44649b5e25fSSatish Balay   xb = x;
44749b5e25fSSatish Balay 
44849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
44949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
45049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
45149b5e25fSSatish Balay     ib = aj + *ai;
452831a3094SHong Zhang     jmin = 0;
4537fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
45449b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
45549b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
45649b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
45749b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
45849b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
459831a3094SHong Zhang       v += 25; jmin++;
4607fbae186SHong Zhang     }
461831a3094SHong Zhang     for (j=jmin; j<n; j++) {
46249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
46349b5e25fSSatish Balay       cval       = ib[j]*5;
46449b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
46549b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
46649b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
46749b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
46849b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
46949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
47049b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
47149b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
47249b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
47349b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
47449b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
47549b5e25fSSatish Balay       v  += 25;
47649b5e25fSSatish Balay     }
47749b5e25fSSatish Balay     xb +=5; ai++;
47849b5e25fSSatish Balay   }
47949b5e25fSSatish Balay 
4801ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4811ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
482899cda47SBarry Smith   ierr = PetscLogFlops(50*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr);
48349b5e25fSSatish Balay   PetscFunctionReturn(0);
48449b5e25fSSatish Balay }
48549b5e25fSSatish Balay 
48649b5e25fSSatish Balay 
4874a2ae208SSatish Balay #undef __FUNCT__
4884a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
489dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
49049b5e25fSSatish Balay {
49149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
49287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
49349b5e25fSSatish Balay   MatScalar      *v;
4946849ba73SBarry Smith   PetscErrorCode ierr;
49513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
49649b5e25fSSatish Balay 
49749b5e25fSSatish Balay   PetscFunctionBegin;
4982dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4991ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5001ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
50149b5e25fSSatish Balay 
50249b5e25fSSatish Balay   v     = a->a;
50349b5e25fSSatish Balay   xb = x;
50449b5e25fSSatish Balay 
50549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
50649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
50749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
50849b5e25fSSatish Balay     ib = aj + *ai;
509831a3094SHong Zhang     jmin = 0;
5107fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
51149b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
51249b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
51349b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
51449b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
51549b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
51649b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
517831a3094SHong Zhang       v += 36; jmin++;
5187fbae186SHong Zhang     }
519831a3094SHong Zhang     for (j=jmin; j<n; j++) {
52049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
52149b5e25fSSatish Balay       cval       = ib[j]*6;
52249b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
52349b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
52449b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
52549b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
52649b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
52749b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
52849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
52949b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
53049b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
53149b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
53249b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
53349b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
53449b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
53549b5e25fSSatish Balay       v  += 36;
53649b5e25fSSatish Balay     }
53749b5e25fSSatish Balay     xb +=6; ai++;
53849b5e25fSSatish Balay   }
53949b5e25fSSatish Balay 
5401ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5411ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
542899cda47SBarry Smith   ierr = PetscLogFlops(72*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr);
54349b5e25fSSatish Balay   PetscFunctionReturn(0);
54449b5e25fSSatish Balay }
5454a2ae208SSatish Balay #undef __FUNCT__
5464a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
547dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
54849b5e25fSSatish Balay {
54949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
55087828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
55149b5e25fSSatish Balay   MatScalar      *v;
5526849ba73SBarry Smith   PetscErrorCode ierr;
55313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
55449b5e25fSSatish Balay 
55549b5e25fSSatish Balay   PetscFunctionBegin;
5562dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5571ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5581ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
55949b5e25fSSatish Balay 
56049b5e25fSSatish Balay   v     = a->a;
56149b5e25fSSatish Balay   xb = x;
56249b5e25fSSatish Balay 
56349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
56449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
56549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
56649b5e25fSSatish Balay     ib = aj + *ai;
567831a3094SHong Zhang     jmin = 0;
5687fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
56949b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
57049b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
57149b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
57249b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
57349b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
57449b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
57549b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
576831a3094SHong Zhang       v += 49; jmin++;
5777fbae186SHong Zhang     }
578831a3094SHong Zhang     for (j=jmin; j<n; j++) {
57949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
58049b5e25fSSatish Balay       cval       = ib[j]*7;
58149b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
58249b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
58349b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
58449b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
58549b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
58649b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
58749b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
58849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
58949b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
59049b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
59149b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
59249b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
59349b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
59449b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
59549b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
59649b5e25fSSatish Balay       v  += 49;
59749b5e25fSSatish Balay     }
59849b5e25fSSatish Balay     xb +=7; ai++;
59949b5e25fSSatish Balay   }
6001ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6011ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
602899cda47SBarry Smith   ierr = PetscLogFlops(98*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr);
60349b5e25fSSatish Balay   PetscFunctionReturn(0);
60449b5e25fSSatish Balay }
60549b5e25fSSatish Balay 
60649b5e25fSSatish Balay /*
60749b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
60849b5e25fSSatish Balay */
6094a2ae208SSatish Balay #undef __FUNCT__
6104a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
611dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
61249b5e25fSSatish Balay {
61349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
61487828ca2SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
6150b60a74dSHong Zhang   MatScalar      *v;
616dfbe8321SBarry Smith   PetscErrorCode ierr;
617899cda47SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2,ncols,k;
61849b5e25fSSatish Balay 
61949b5e25fSSatish Balay   PetscFunctionBegin;
6202dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
6211ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
6221ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
62349b5e25fSSatish Balay 
62449b5e25fSSatish Balay   aj   = a->j;
62549b5e25fSSatish Balay   v    = a->a;
62649b5e25fSSatish Balay   ii   = a->i;
62749b5e25fSSatish Balay 
62849b5e25fSSatish Balay   if (!a->mult_work) {
629899cda47SBarry Smith     ierr = PetscMalloc((A->rmap.N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
63049b5e25fSSatish Balay   }
63149b5e25fSSatish Balay   work = a->mult_work;
63249b5e25fSSatish Balay 
63349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
63449b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
63549b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
63649b5e25fSSatish Balay 
63749b5e25fSSatish Balay     /* upper triangular part */
63849b5e25fSSatish Balay     for (j=0; j<n; j++) {
63949b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
64049b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
64149b5e25fSSatish Balay       workt += bs;
64249b5e25fSSatish Balay     }
64349b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
64449b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
64549b5e25fSSatish Balay 
64649b5e25fSSatish Balay     /* strict lower triangular part */
647831a3094SHong Zhang     idx = aj+ii[0];
648831a3094SHong Zhang     if (*idx == i){
64996b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
650831a3094SHong Zhang     }
65196b9376eSHong Zhang 
65249b5e25fSSatish Balay     if (ncols > 0){
65349b5e25fSSatish Balay       workt = work;
65487828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
655831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
656831a3094SHong Zhang       for (j=0; j<n; j++) {
657831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
65849b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
65949b5e25fSSatish Balay         workt += bs;
66049b5e25fSSatish Balay       }
66149b5e25fSSatish Balay     }
66249b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
66349b5e25fSSatish Balay   }
66449b5e25fSSatish Balay 
6651ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6661ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
667899cda47SBarry Smith   ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.N)*bs2 - A->rmap.N);CHKERRQ(ierr);
66849b5e25fSSatish Balay   PetscFunctionReturn(0);
66949b5e25fSSatish Balay }
67049b5e25fSSatish Balay 
6716a65c766SHong Zhang extern PetscErrorCode VecCopy_Seq(Vec,Vec);
6724a2ae208SSatish Balay #undef __FUNCT__
6734a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
674dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
67549b5e25fSSatish Balay {
67649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
677bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1;
67849b5e25fSSatish Balay   MatScalar      *v;
6796849ba73SBarry Smith   PetscErrorCode ierr;
68013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
68149b5e25fSSatish Balay 
68249b5e25fSSatish Balay   PetscFunctionBegin;
6836a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
6841ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6851ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
68649b5e25fSSatish Balay   v  = a->a;
68749b5e25fSSatish Balay   xb = x;
68849b5e25fSSatish Balay 
68949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
69049b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
69149b5e25fSSatish Balay     x1 = xb[0];
69249b5e25fSSatish Balay     ib = aj + *ai;
693831a3094SHong Zhang     jmin = 0;
694831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
695831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
696831a3094SHong Zhang     }
697831a3094SHong Zhang     for (j=jmin; j<n; j++) {
69849b5e25fSSatish Balay       cval    = *ib;
69949b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
70049b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
70149b5e25fSSatish Balay     }
70249b5e25fSSatish Balay     xb++; ai++;
70349b5e25fSSatish Balay   }
70449b5e25fSSatish Balay 
7051ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
706bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
70749b5e25fSSatish Balay 
708899cda47SBarry Smith   ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
70949b5e25fSSatish Balay   PetscFunctionReturn(0);
71049b5e25fSSatish Balay }
71149b5e25fSSatish Balay 
7124a2ae208SSatish Balay #undef __FUNCT__
7134a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
714dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
71549b5e25fSSatish Balay {
71649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
717bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2;
71849b5e25fSSatish Balay   MatScalar      *v;
7196849ba73SBarry Smith   PetscErrorCode ierr;
72013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
72149b5e25fSSatish Balay 
72249b5e25fSSatish Balay   PetscFunctionBegin;
7236a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7241ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7251ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
72649b5e25fSSatish Balay 
72749b5e25fSSatish Balay   v  = a->a;
72849b5e25fSSatish Balay   xb = x;
72949b5e25fSSatish Balay 
73049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
73149b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
73249b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
73349b5e25fSSatish Balay     ib = aj + *ai;
734831a3094SHong Zhang     jmin = 0;
7357fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
73649b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
73749b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
738831a3094SHong Zhang       v += 4; jmin++;
7397fbae186SHong Zhang     }
740831a3094SHong Zhang     for (j=jmin; j<n; j++) {
74149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
74249b5e25fSSatish Balay       cval       = ib[j]*2;
74349b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
74449b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
74549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
74649b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
74749b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
74849b5e25fSSatish Balay       v  += 4;
74949b5e25fSSatish Balay     }
75049b5e25fSSatish Balay     xb +=2; ai++;
75149b5e25fSSatish Balay   }
7521ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
753bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
75449b5e25fSSatish Balay 
755899cda47SBarry Smith   ierr = PetscLogFlops(4*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
75649b5e25fSSatish Balay   PetscFunctionReturn(0);
75749b5e25fSSatish Balay }
75849b5e25fSSatish Balay 
7594a2ae208SSatish Balay #undef __FUNCT__
7604a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
761dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
76249b5e25fSSatish Balay {
76349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
764bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3;
76549b5e25fSSatish Balay   MatScalar      *v;
7666849ba73SBarry Smith   PetscErrorCode ierr;
76713f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
76849b5e25fSSatish Balay 
76949b5e25fSSatish Balay   PetscFunctionBegin;
7706a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7711ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7721ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
77349b5e25fSSatish Balay 
77449b5e25fSSatish Balay   v     = a->a;
77549b5e25fSSatish Balay   xb = x;
77649b5e25fSSatish Balay 
77749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
77849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
77949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
78049b5e25fSSatish Balay     ib = aj + *ai;
781831a3094SHong Zhang     jmin = 0;
7827fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
78349b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
78449b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
78549b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
786831a3094SHong Zhang      v += 9; jmin++;
7877fbae186SHong Zhang     }
788831a3094SHong Zhang     for (j=jmin; j<n; j++) {
78949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
79049b5e25fSSatish Balay       cval       = ib[j]*3;
79149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
79249b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
79349b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
79449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
79549b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
79649b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
79749b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
79849b5e25fSSatish Balay       v  += 9;
79949b5e25fSSatish Balay     }
80049b5e25fSSatish Balay     xb +=3; ai++;
80149b5e25fSSatish Balay   }
80249b5e25fSSatish Balay 
8031ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
804bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
80549b5e25fSSatish Balay 
806899cda47SBarry Smith   ierr = PetscLogFlops(18*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
80749b5e25fSSatish Balay   PetscFunctionReturn(0);
80849b5e25fSSatish Balay }
80949b5e25fSSatish Balay 
8104a2ae208SSatish Balay #undef __FUNCT__
8114a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
812dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
81349b5e25fSSatish Balay {
81449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
815bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
81649b5e25fSSatish Balay   MatScalar      *v;
8176849ba73SBarry Smith   PetscErrorCode ierr;
81813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
81949b5e25fSSatish Balay 
82049b5e25fSSatish Balay   PetscFunctionBegin;
8216a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8221ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8231ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
82449b5e25fSSatish Balay 
82549b5e25fSSatish Balay   v     = a->a;
82649b5e25fSSatish Balay   xb = x;
82749b5e25fSSatish Balay 
82849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
82949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
83049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
83149b5e25fSSatish Balay     ib = aj + *ai;
832831a3094SHong Zhang     jmin = 0;
8337fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
83449b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
83549b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
83649b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
83749b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
838831a3094SHong Zhang       v += 16; jmin++;
8397fbae186SHong Zhang     }
840831a3094SHong Zhang     for (j=jmin; j<n; j++) {
84149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
84249b5e25fSSatish Balay       cval       = ib[j]*4;
84349b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
84449b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
84549b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
84649b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
84749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
84849b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
84949b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
85049b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
85149b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
85249b5e25fSSatish Balay       v  += 16;
85349b5e25fSSatish Balay     }
85449b5e25fSSatish Balay     xb +=4; ai++;
85549b5e25fSSatish Balay   }
85649b5e25fSSatish Balay 
8571ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
858bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
85949b5e25fSSatish Balay 
860899cda47SBarry Smith   ierr = PetscLogFlops(32*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
86149b5e25fSSatish Balay   PetscFunctionReturn(0);
86249b5e25fSSatish Balay }
86349b5e25fSSatish Balay 
8644a2ae208SSatish Balay #undef __FUNCT__
8654a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
866dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
86749b5e25fSSatish Balay {
86849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
869bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
87049b5e25fSSatish Balay   MatScalar      *v;
8716849ba73SBarry Smith   PetscErrorCode ierr;
87213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
87349b5e25fSSatish Balay 
87449b5e25fSSatish Balay   PetscFunctionBegin;
8756a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8761ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8771ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
87849b5e25fSSatish Balay 
87949b5e25fSSatish Balay   v     = a->a;
88049b5e25fSSatish Balay   xb = x;
88149b5e25fSSatish Balay 
88249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
88349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
88449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
88549b5e25fSSatish Balay     ib = aj + *ai;
886831a3094SHong Zhang     jmin = 0;
8877fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
88849b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
88949b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
89049b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
89149b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
89249b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
893831a3094SHong Zhang       v += 25; jmin++;
8947fbae186SHong Zhang     }
895831a3094SHong Zhang     for (j=jmin; j<n; j++) {
89649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
89749b5e25fSSatish Balay       cval       = ib[j]*5;
89849b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
89949b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
90049b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
90149b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
90249b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
90349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
90449b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
90549b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
90649b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
90749b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
90849b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
90949b5e25fSSatish Balay       v  += 25;
91049b5e25fSSatish Balay     }
91149b5e25fSSatish Balay     xb +=5; ai++;
91249b5e25fSSatish Balay   }
91349b5e25fSSatish Balay 
9141ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
915bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
91649b5e25fSSatish Balay 
917899cda47SBarry Smith   ierr = PetscLogFlops(50*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
91849b5e25fSSatish Balay   PetscFunctionReturn(0);
91949b5e25fSSatish Balay }
9204a2ae208SSatish Balay #undef __FUNCT__
9214a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
922dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
92349b5e25fSSatish Balay {
92449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
925bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
92649b5e25fSSatish Balay   MatScalar      *v;
9276849ba73SBarry Smith   PetscErrorCode ierr;
92813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
92949b5e25fSSatish Balay 
93049b5e25fSSatish Balay   PetscFunctionBegin;
9316a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9321ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9331ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
93449b5e25fSSatish Balay 
93549b5e25fSSatish Balay   v     = a->a;
93649b5e25fSSatish Balay   xb = x;
93749b5e25fSSatish Balay 
93849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
93949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
94049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
94149b5e25fSSatish Balay     ib = aj + *ai;
942831a3094SHong Zhang     jmin = 0;
9437fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
94449b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
94549b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
94649b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
94749b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
94849b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
94949b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
950831a3094SHong Zhang       v += 36; jmin++;
9517fbae186SHong Zhang     }
952831a3094SHong Zhang     for (j=jmin; j<n; j++) {
95349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
95449b5e25fSSatish Balay       cval       = ib[j]*6;
95549b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
95649b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
95749b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
95849b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
95949b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
96049b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
96149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
96249b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
96349b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
96449b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
96549b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
96649b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
96749b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
96849b5e25fSSatish Balay       v  += 36;
96949b5e25fSSatish Balay     }
97049b5e25fSSatish Balay     xb +=6; ai++;
97149b5e25fSSatish Balay   }
97249b5e25fSSatish Balay 
9731ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
974bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
97549b5e25fSSatish Balay 
976899cda47SBarry Smith   ierr = PetscLogFlops(72*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
97749b5e25fSSatish Balay   PetscFunctionReturn(0);
97849b5e25fSSatish Balay }
97949b5e25fSSatish Balay 
9804a2ae208SSatish Balay #undef __FUNCT__
9814a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
982dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
98349b5e25fSSatish Balay {
98449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
985bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
98649b5e25fSSatish Balay   MatScalar      *v;
9876849ba73SBarry Smith   PetscErrorCode ierr;
98813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
98949b5e25fSSatish Balay 
99049b5e25fSSatish Balay   PetscFunctionBegin;
9916a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9921ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9931ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
99449b5e25fSSatish Balay 
99549b5e25fSSatish Balay   v     = a->a;
99649b5e25fSSatish Balay   xb = x;
99749b5e25fSSatish Balay 
99849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
99949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
100049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
100149b5e25fSSatish Balay     ib = aj + *ai;
1002831a3094SHong Zhang     jmin = 0;
10037fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
100449b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
100549b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
100649b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
100749b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
100849b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
100949b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
101049b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1011831a3094SHong Zhang       v += 49; jmin++;
10127fbae186SHong Zhang     }
1013831a3094SHong Zhang     for (j=jmin; j<n; j++) {
101449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
101549b5e25fSSatish Balay       cval       = ib[j]*7;
101649b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
101749b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
101849b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
101949b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
102049b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
102149b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
102249b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
102349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
102449b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
102549b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
102649b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
102749b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
102849b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
102949b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
103049b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
103149b5e25fSSatish Balay       v  += 49;
103249b5e25fSSatish Balay     }
103349b5e25fSSatish Balay     xb +=7; ai++;
103449b5e25fSSatish Balay   }
103549b5e25fSSatish Balay 
10361ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1037bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
103849b5e25fSSatish Balay 
1039899cda47SBarry Smith   ierr = PetscLogFlops(98*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
104049b5e25fSSatish Balay   PetscFunctionReturn(0);
104149b5e25fSSatish Balay }
104249b5e25fSSatish Balay 
10434a2ae208SSatish Balay #undef __FUNCT__
10444a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1045dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
104649b5e25fSSatish Balay {
104749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1048bba805e6SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1049066653e3SSatish Balay   MatScalar      *v;
1050dfbe8321SBarry Smith   PetscErrorCode ierr;
1051899cda47SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2,ncols,k;
105249b5e25fSSatish Balay 
105349b5e25fSSatish Balay   PetscFunctionBegin;
10546a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
10551ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
10561ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
105749b5e25fSSatish Balay 
105849b5e25fSSatish Balay   aj   = a->j;
105949b5e25fSSatish Balay   v    = a->a;
106049b5e25fSSatish Balay   ii   = a->i;
106149b5e25fSSatish Balay 
106249b5e25fSSatish Balay   if (!a->mult_work) {
1063899cda47SBarry Smith     ierr = PetscMalloc((A->rmap.n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
106449b5e25fSSatish Balay   }
106549b5e25fSSatish Balay   work = a->mult_work;
106649b5e25fSSatish Balay 
106749b5e25fSSatish Balay 
106849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
106949b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
107049b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
107149b5e25fSSatish Balay 
107249b5e25fSSatish Balay     /* upper triangular part */
107349b5e25fSSatish Balay     for (j=0; j<n; j++) {
107449b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
107549b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
107649b5e25fSSatish Balay       workt += bs;
107749b5e25fSSatish Balay     }
107849b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
107949b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
108049b5e25fSSatish Balay 
108149b5e25fSSatish Balay     /* strict lower triangular part */
1082831a3094SHong Zhang     idx = aj+ii[0];
1083831a3094SHong Zhang     if (*idx == i){
108496b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1085831a3094SHong Zhang     }
108649b5e25fSSatish Balay     if (ncols > 0){
108749b5e25fSSatish Balay       workt = work;
108887828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1089831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1090831a3094SHong Zhang       for (j=0; j<n; j++) {
1091831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
109249b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
109349b5e25fSSatish Balay         workt += bs;
109449b5e25fSSatish Balay       }
109549b5e25fSSatish Balay     }
109649b5e25fSSatish Balay 
109749b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
109849b5e25fSSatish Balay   }
109949b5e25fSSatish Balay 
11001ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1101bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
110249b5e25fSSatish Balay 
1103899cda47SBarry Smith   ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
110449b5e25fSSatish Balay   PetscFunctionReturn(0);
110549b5e25fSSatish Balay }
110649b5e25fSSatish Balay 
11074a2ae208SSatish Balay #undef __FUNCT__
11084a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1109f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
111049b5e25fSSatish Balay {
111149b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1112f4df32b1SMatthew Knepley   PetscScalar oalpha = alpha;
11134ce68768SBarry Smith   PetscBLASInt one = 1,totalnz = (PetscBLASInt)a->bs2*a->nz;
1114efee365bSSatish Balay   PetscErrorCode ierr;
111549b5e25fSSatish Balay 
111649b5e25fSSatish Balay   PetscFunctionBegin;
1117f4df32b1SMatthew Knepley   BLASscal_(&totalnz,&oalpha,a->a,&one);
1118efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
111949b5e25fSSatish Balay   PetscFunctionReturn(0);
112049b5e25fSSatish Balay }
112149b5e25fSSatish Balay 
11224a2ae208SSatish Balay #undef __FUNCT__
11234a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1124dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
112549b5e25fSSatish Balay {
112649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
112749b5e25fSSatish Balay   MatScalar      *v = a->a;
112849b5e25fSSatish Balay   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1129899cda47SBarry Smith   PetscInt       i,j,k,bs = A->rmap.bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
11306849ba73SBarry Smith   PetscErrorCode ierr;
113113f74950SBarry Smith   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
113249b5e25fSSatish Balay 
113349b5e25fSSatish Balay   PetscFunctionBegin;
113449b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
113549b5e25fSSatish Balay     for (k=0; k<mbs; k++){
113649b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1137831a3094SHong Zhang       col  = aj + jmin;
1138831a3094SHong Zhang       if (*col == k){         /* diagonal block */
113949b5e25fSSatish Balay         for (i=0; i<bs2; i++){
114049b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
114149b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
114249b5e25fSSatish Balay #else
114349b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
114449b5e25fSSatish Balay #endif
114549b5e25fSSatish Balay         }
1146831a3094SHong Zhang         jmin++;
1147831a3094SHong Zhang       }
1148831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
114949b5e25fSSatish Balay         for (i=0; i<bs2; i++){
115049b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
115149b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
115249b5e25fSSatish Balay #else
115349b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
115449b5e25fSSatish Balay #endif
115549b5e25fSSatish Balay         }
115649b5e25fSSatish Balay       }
115749b5e25fSSatish Balay     }
115849b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
11590b8dc8d2SHong Zhang   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
11600b8dc8d2SHong Zhang     ierr = PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);CHKERRQ(ierr);
11610b8dc8d2SHong Zhang     jl   = il + mbs;
11620b8dc8d2SHong Zhang     sum  = (PetscReal*)(jl + mbs);
11630b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11640b8dc8d2SHong Zhang     il[0] = 0;
116549b5e25fSSatish Balay 
116649b5e25fSSatish Balay     *norm = 0.0;
116749b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
116849b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
116949b5e25fSSatish Balay       /*-- col sum --*/
117049b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1171831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1172831a3094SHong Zhang                   at step k */
117349b5e25fSSatish Balay       while (i<mbs){
117449b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
117549b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
117649b5e25fSSatish Balay         for (j=0; j<bs; j++){
117749b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
117849b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
117949b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
118049b5e25fSSatish Balay           }
118149b5e25fSSatish Balay         }
118249b5e25fSSatish Balay         /* update il, jl */
1183831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1184831a3094SHong Zhang         jmax = a->i[i+1];
118549b5e25fSSatish Balay         if (jmin < jmax){
118649b5e25fSSatish Balay           il[i] = jmin;
118749b5e25fSSatish Balay           j   = a->j[jmin];
118849b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
118949b5e25fSSatish Balay         }
119049b5e25fSSatish Balay         i = nexti;
119149b5e25fSSatish Balay       }
119249b5e25fSSatish Balay       /*-- row sum --*/
119349b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
119449b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
119549b5e25fSSatish Balay         for (j=0; j<bs; j++){
119649b5e25fSSatish Balay           v = a->a + i*bs2 + j;
119749b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
11980b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
119949b5e25fSSatish Balay           }
120049b5e25fSSatish Balay         }
120149b5e25fSSatish Balay       }
120249b5e25fSSatish Balay       /* add k_th block row to il, jl */
1203831a3094SHong Zhang       col = aj+jmin;
1204831a3094SHong Zhang       if (*col == k) jmin++;
120549b5e25fSSatish Balay       if (jmin < jmax){
120649b5e25fSSatish Balay         il[k] = jmin;
12070b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
120849b5e25fSSatish Balay       }
120949b5e25fSSatish Balay       for (j=0; j<bs; j++){
121049b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
121149b5e25fSSatish Balay       }
121249b5e25fSSatish Balay     }
121349b5e25fSSatish Balay     ierr = PetscFree(il);CHKERRQ(ierr);
121449b5e25fSSatish Balay   } else {
1215347d480fSBarry Smith     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
121649b5e25fSSatish Balay   }
121749b5e25fSSatish Balay   PetscFunctionReturn(0);
121849b5e25fSSatish Balay }
121949b5e25fSSatish Balay 
12204a2ae208SSatish Balay #undef __FUNCT__
12214a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1222dfbe8321SBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
122349b5e25fSSatish Balay {
122449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1225dfbe8321SBarry Smith   PetscErrorCode ierr;
122649b5e25fSSatish Balay 
122749b5e25fSSatish Balay   PetscFunctionBegin;
122849b5e25fSSatish Balay 
122949b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1230899cda47SBarry Smith   if ((A->rmap.N != B->rmap.N) || (A->cmap.n != B->cmap.n) || (A->rmap.bs != B->rmap.bs)|| (a->nz != b->nz)) {
1231ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1232ef511fbeSHong Zhang     PetscFunctionReturn(0);
123349b5e25fSSatish Balay   }
123449b5e25fSSatish Balay 
123549b5e25fSSatish Balay   /* if the a->i are the same */
123613f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1237abc0a331SBarry Smith   if (!*flg) {
123849b5e25fSSatish Balay     PetscFunctionReturn(0);
123949b5e25fSSatish Balay   }
124049b5e25fSSatish Balay 
124149b5e25fSSatish Balay   /* if a->j are the same */
124213f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1243abc0a331SBarry Smith   if (!*flg) {
124449b5e25fSSatish Balay     PetscFunctionReturn(0);
124549b5e25fSSatish Balay   }
124649b5e25fSSatish Balay   /* if a->a are the same */
1247899cda47SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap.bs)*(A->rmap.bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1248935af2e7SHong Zhang   PetscFunctionReturn(0);
124949b5e25fSSatish Balay }
125049b5e25fSSatish Balay 
12514a2ae208SSatish Balay #undef __FUNCT__
12524a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1253dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
125449b5e25fSSatish Balay {
125549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1256dfbe8321SBarry Smith   PetscErrorCode ierr;
125713f74950SBarry Smith   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
125887828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
125949b5e25fSSatish Balay   MatScalar      *aa,*aa_j;
126049b5e25fSSatish Balay 
126149b5e25fSSatish Balay   PetscFunctionBegin;
1262899cda47SBarry Smith   bs   = A->rmap.bs;
126382799104SHong Zhang   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
126482799104SHong Zhang 
126549b5e25fSSatish Balay   aa   = a->a;
126649b5e25fSSatish Balay   ai   = a->i;
126749b5e25fSSatish Balay   aj   = a->j;
126849b5e25fSSatish Balay   ambs = a->mbs;
126949b5e25fSSatish Balay   bs2  = a->bs2;
127049b5e25fSSatish Balay 
12712dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
12721ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
127349b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1274899cda47SBarry Smith   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
127549b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
127649b5e25fSSatish Balay     j=ai[i];
127749b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
127849b5e25fSSatish Balay       row  = i*bs;
127949b5e25fSSatish Balay       aa_j = aa + j*bs2;
128082799104SHong Zhang       if (A->factor && bs==1){
128182799104SHong Zhang         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
128282799104SHong Zhang       } else {
128349b5e25fSSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
128449b5e25fSSatish Balay       }
128549b5e25fSSatish Balay     }
128682799104SHong Zhang   }
128782799104SHong Zhang 
12881ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
128949b5e25fSSatish Balay   PetscFunctionReturn(0);
129049b5e25fSSatish Balay }
129149b5e25fSSatish Balay 
12924a2ae208SSatish Balay #undef __FUNCT__
12934a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1294dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
129549b5e25fSSatish Balay {
129649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
12975e90f9d9SHong Zhang   PetscScalar    *l,x,*li,*ri;
129849b5e25fSSatish Balay   MatScalar      *aa,*v;
1299dfbe8321SBarry Smith   PetscErrorCode ierr;
13005e90f9d9SHong Zhang   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1301b3bf805bSHong Zhang   PetscTruth     flg;
130249b5e25fSSatish Balay 
130349b5e25fSSatish Balay   PetscFunctionBegin;
1304b3bf805bSHong Zhang   if (ll != rr){
1305b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1306b3bf805bSHong Zhang     if (!flg)
1307b3bf805bSHong Zhang       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1308b3bf805bSHong Zhang   }
1309b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
131049b5e25fSSatish Balay   ai  = a->i;
131149b5e25fSSatish Balay   aj  = a->j;
131249b5e25fSSatish Balay   aa  = a->a;
1313899cda47SBarry Smith   m   = A->rmap.N;
1314899cda47SBarry Smith   bs  = A->rmap.bs;
131549b5e25fSSatish Balay   mbs = a->mbs;
131649b5e25fSSatish Balay   bs2 = a->bs2;
131749b5e25fSSatish Balay 
13181ebc52fbSHong Zhang   ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
131949b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1320347d480fSBarry Smith   if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
132149b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
132249b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
132349b5e25fSSatish Balay     li = l + i*bs;
132449b5e25fSSatish Balay     v  = aa + bs2*ai[i];
132549b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
132649b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
13275e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
132849b5e25fSSatish Balay         x = ri[k];
132949b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
133049b5e25fSSatish Balay       }
133149b5e25fSSatish Balay     }
133249b5e25fSSatish Balay   }
13331ebc52fbSHong Zhang   ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1334efee365bSSatish Balay   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
133549b5e25fSSatish Balay   PetscFunctionReturn(0);
133649b5e25fSSatish Balay }
133749b5e25fSSatish Balay 
13384a2ae208SSatish Balay #undef __FUNCT__
13394a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1340dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
134149b5e25fSSatish Balay {
134249b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
134349b5e25fSSatish Balay 
134449b5e25fSSatish Balay   PetscFunctionBegin;
1345899cda47SBarry Smith   info->rows_global    = (double)A->rmap.N;
1346899cda47SBarry Smith   info->columns_global = (double)A->rmap.N;
1347899cda47SBarry Smith   info->rows_local     = (double)A->rmap.N;
1348899cda47SBarry Smith   info->columns_local  = (double)A->rmap.N;
134949b5e25fSSatish Balay   info->block_size     = a->bs2;
13506c6c5352SBarry Smith   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
13516c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
135249b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
135349b5e25fSSatish Balay   info->assemblies   = A->num_ass;
135449b5e25fSSatish Balay   info->mallocs      = a->reallocs;
1355*7adad957SLisandro Dalcin   info->memory       = ((PetscObject)A)->mem;
135649b5e25fSSatish Balay   if (A->factor) {
135749b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
135849b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
135949b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
136049b5e25fSSatish Balay   } else {
136149b5e25fSSatish Balay     info->fill_ratio_given  = 0;
136249b5e25fSSatish Balay     info->fill_ratio_needed = 0;
136349b5e25fSSatish Balay     info->factor_mallocs    = 0;
136449b5e25fSSatish Balay   }
136549b5e25fSSatish Balay   PetscFunctionReturn(0);
136649b5e25fSSatish Balay }
136749b5e25fSSatish Balay 
136849b5e25fSSatish Balay 
13694a2ae208SSatish Balay #undef __FUNCT__
13704a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1371dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
137249b5e25fSSatish Balay {
137349b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1374dfbe8321SBarry Smith   PetscErrorCode ierr;
137549b5e25fSSatish Balay 
137649b5e25fSSatish Balay   PetscFunctionBegin;
137749b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
137849b5e25fSSatish Balay   PetscFunctionReturn(0);
137949b5e25fSSatish Balay }
1380dc354874SHong Zhang 
13814a2ae208SSatish Balay #undef __FUNCT__
1382985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ"
1383985db425SBarry Smith /*
1384985db425SBarry Smith    This code does not work since it only checks the upper triangular part of
1385985db425SBarry Smith   the matrix. Hence it is not listed in the function table.
1386985db425SBarry Smith */
1387985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1388dc354874SHong Zhang {
1389dc354874SHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1390dfbe8321SBarry Smith   PetscErrorCode ierr;
139113f74950SBarry Smith   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1392c3fca9a7SHong Zhang   PetscReal      atmp;
1393273d9f13SBarry Smith   MatScalar      *aa;
1394985db425SBarry Smith   PetscScalar    *x;
139513f74950SBarry Smith   PetscInt       ncols,brow,bcol,krow,kcol;
1396dc354874SHong Zhang 
1397dc354874SHong Zhang   PetscFunctionBegin;
1398985db425SBarry Smith   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1399dc354874SHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1400899cda47SBarry Smith   bs   = A->rmap.bs;
1401dc354874SHong Zhang   aa   = a->a;
1402dc354874SHong Zhang   ai   = a->i;
1403dc354874SHong Zhang   aj   = a->j;
140444117c81SHong Zhang   mbs = a->mbs;
1405dc354874SHong Zhang 
1406985db425SBarry Smith   ierr = VecSet(v,0.0);CHKERRQ(ierr);
14071ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1408dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1409899cda47SBarry Smith   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
141044117c81SHong Zhang   for (i=0; i<mbs; i++) {
1411d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1412d0f6400bSHong Zhang     brow  = bs*i;
141344117c81SHong Zhang     for (j=0; j<ncols; j++){
1414d0f6400bSHong Zhang       bcol = bs*(*aj);
141544117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1416d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
141744117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1418d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1419d0f6400bSHong Zhang           row = brow + krow;    /* row index */
1420c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1421c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
142244117c81SHong Zhang         }
142344117c81SHong Zhang       }
1424d0f6400bSHong Zhang       aj++;
1425dc354874SHong Zhang     }
1426dc354874SHong Zhang   }
14271ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1428dc354874SHong Zhang   PetscFunctionReturn(0);
1429dc354874SHong Zhang }
1430