xref: /petsc/src/mat/impls/sbaij/seq/sbaij2.c (revision 3cb60589b191bc9e8fd8fa46cceaa471201c7ec2)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
249b5e25fSSatish Balay 
349b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h"
449b5e25fSSatish Balay #include "src/inline/spops.h"
549b5e25fSSatish Balay #include "src/inline/ilu.h"
649b5e25fSSatish Balay #include "petscbt.h"
73a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h"
849b5e25fSSatish Balay 
94a2ae208SSatish Balay #undef __FUNCT__
104a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ"
1113f74950SBarry Smith PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
1249b5e25fSSatish Balay {
135eee224dSHong Zhang   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
146849ba73SBarry Smith   PetscErrorCode ierr;
15521d7252SBarry Smith   PetscInt       brow,i,j,k,l,mbs,n,*idx,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
16521d7252SBarry Smith   PetscBT        table,table0;
17d94109b8SHong Zhang 
18d94109b8SHong Zhang   PetscFunctionBegin;
19b3bf805bSHong Zhang   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20d94109b8SHong Zhang   mbs = a->mbs;
21d94109b8SHong Zhang   ai  = a->i;
22d94109b8SHong Zhang   aj  = a->j;
23899cda47SBarry Smith   bs  = A->rmap.bs;
24d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table);CHKERRQ(ierr);
2513f74950SBarry Smith   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
26899cda47SBarry Smith   ierr = PetscMalloc((A->rmap.N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
27d94109b8SHong Zhang   ierr = PetscBTCreate(mbs,table0);CHKERRQ(ierr);
28d94109b8SHong Zhang 
29d94109b8SHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
30d94109b8SHong Zhang     isz  = 0;
31d94109b8SHong Zhang     ierr = PetscBTMemzero(mbs,table);CHKERRQ(ierr);
32d94109b8SHong Zhang 
33d94109b8SHong Zhang     /* Extract the indices, assume there can be duplicate entries */
34d94109b8SHong Zhang     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
35d94109b8SHong Zhang     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
36d94109b8SHong Zhang 
37d94109b8SHong Zhang     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
38dbe03f88SHong Zhang     bcol_max = 0;
39d94109b8SHong Zhang     for (j=0; j<n ; ++j){
40d94109b8SHong Zhang       brow = idx[j]/bs; /* convert the indices into block indices */
41d94109b8SHong Zhang       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42dbe03f88SHong Zhang       if(!PetscBTLookupSet(table,brow)) {
43dbe03f88SHong Zhang         nidx[isz++] = brow;
44dbe03f88SHong Zhang         if (bcol_max < brow) bcol_max = brow;
45dbe03f88SHong Zhang       }
46d94109b8SHong Zhang     }
47d94109b8SHong Zhang     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
48d94109b8SHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
49d94109b8SHong Zhang 
50d94109b8SHong Zhang     k = 0;
51d94109b8SHong Zhang     for (j=0; j<ov; j++){ /* for each overlap */
52d94109b8SHong Zhang       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53d94109b8SHong Zhang       ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr);
54efee365bSSatish Balay       for (l=k; l<isz; l++) { ierr = PetscBTSet(table0,nidx[l]);CHKERRQ(ierr); }
55d94109b8SHong Zhang 
56d94109b8SHong Zhang       n = isz;  /* length of the updated is[i] */
57d94109b8SHong Zhang       for (brow=0; brow<mbs; brow++){
58d94109b8SHong Zhang         start = ai[brow]; end   = ai[brow+1];
59d94109b8SHong Zhang         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
60d94109b8SHong Zhang           for (l = start; l<end ; l++){
61d94109b8SHong Zhang             bcol = aj[l];
62d94109b8SHong Zhang             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
63d94109b8SHong Zhang           }
64d94109b8SHong Zhang           k++;
65d94109b8SHong Zhang           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
66d94109b8SHong Zhang         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
67d94109b8SHong Zhang           for (l = start; l<end ; l++){
68d94109b8SHong Zhang             bcol = aj[l];
69dbe03f88SHong Zhang             if (bcol > bcol_max) break;
70d94109b8SHong Zhang             if (PetscBTLookup(table0,bcol)){
71d94109b8SHong Zhang               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
72d94109b8SHong Zhang               break; /* for l = start; l<end ; l++) */
73d94109b8SHong Zhang             }
74d94109b8SHong Zhang           }
75d94109b8SHong Zhang         }
76d94109b8SHong Zhang       }
77d94109b8SHong Zhang     } /* for each overlap */
78d94109b8SHong Zhang 
79d94109b8SHong Zhang     /* expand the Index Set */
80d94109b8SHong Zhang     for (j=0; j<isz; j++) {
81d94109b8SHong Zhang       for (k=0; k<bs; k++)
82d94109b8SHong Zhang         nidx2[j*bs+k] = nidx[j]*bs+k;
83d94109b8SHong Zhang     }
84d94109b8SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
85d94109b8SHong Zhang   }
86d94109b8SHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
87d94109b8SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
88d94109b8SHong Zhang   ierr = PetscFree(nidx2);CHKERRQ(ierr);
89d94109b8SHong Zhang   ierr = PetscBTDestroy(table0);CHKERRQ(ierr);
905eee224dSHong Zhang   PetscFunctionReturn(0);
9149b5e25fSSatish Balay }
9249b5e25fSSatish Balay 
934a2ae208SSatish Balay #undef __FUNCT__
944a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private"
9513f74950SBarry Smith PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
9649b5e25fSSatish Balay {
9749b5e25fSSatish Balay   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
986849ba73SBarry Smith   PetscErrorCode ierr;
9913f74950SBarry Smith   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
10013f74950SBarry Smith   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
101899cda47SBarry Smith   PetscInt       *irow,nrows,*ssmap,bs=A->rmap.bs,bs2=a->bs2;
10213f74950SBarry Smith   PetscInt       *aj = a->j,*ai = a->i;
10349b5e25fSSatish Balay   MatScalar      *mat_a;
10449b5e25fSSatish Balay   Mat            C;
10549b5e25fSSatish Balay   PetscTruth     flag;
10649b5e25fSSatish Balay 
10749b5e25fSSatish Balay   PetscFunctionBegin;
108e005ede5SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
10949b5e25fSSatish Balay   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
110347d480fSBarry Smith   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
11149b5e25fSSatish Balay 
11249b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
11349b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
11449b5e25fSSatish Balay 
11513f74950SBarry Smith   ierr  = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr);
11649b5e25fSSatish Balay   ssmap = smap;
11713f74950SBarry Smith   ierr  = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
11813f74950SBarry Smith   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
11949b5e25fSSatish Balay   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
12049b5e25fSSatish Balay   /* determine lens of each row */
12149b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
12249b5e25fSSatish Balay     kstart  = ai[irow[i]];
12349b5e25fSSatish Balay     kend    = kstart + a->ilen[irow[i]];
12449b5e25fSSatish Balay     lens[i] = 0;
12549b5e25fSSatish Balay       for (k=kstart; k<kend; k++) {
12649b5e25fSSatish Balay         if (ssmap[aj[k]]) {
12749b5e25fSSatish Balay           lens[i]++;
12849b5e25fSSatish Balay         }
12949b5e25fSSatish Balay       }
13049b5e25fSSatish Balay     }
13149b5e25fSSatish Balay   /* Create and fill new matrix */
13249b5e25fSSatish Balay   if (scall == MAT_REUSE_MATRIX) {
13349b5e25fSSatish Balay     c = (Mat_SeqSBAIJ *)((*B)->data);
13449b5e25fSSatish Balay 
135899cda47SBarry Smith     if (c->mbs!=nrows || (*B)->rmap.bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
13613f74950SBarry Smith     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
137abc0a331SBarry Smith     if (!flag) {
138347d480fSBarry Smith       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
13949b5e25fSSatish Balay     }
14013f74950SBarry Smith     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
14149b5e25fSSatish Balay     C = *B;
14249b5e25fSSatish Balay   } else {
143f69a0ea3SMatthew Knepley     ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
144f69a0ea3SMatthew Knepley     ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
145e2d9671bSKris Buschelman     ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
146ab93d7beSBarry Smith     ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr);
14749b5e25fSSatish Balay   }
14849b5e25fSSatish Balay   c = (Mat_SeqSBAIJ *)(C->data);
14949b5e25fSSatish Balay   for (i=0; i<nrows; i++) {
15049b5e25fSSatish Balay     row    = irow[i];
15149b5e25fSSatish Balay     kstart = ai[row];
15249b5e25fSSatish Balay     kend   = kstart + a->ilen[row];
15349b5e25fSSatish Balay     mat_i  = c->i[i];
15449b5e25fSSatish Balay     mat_j  = c->j + mat_i;
15549b5e25fSSatish Balay     mat_a  = c->a + mat_i*bs2;
15649b5e25fSSatish Balay     mat_ilen = c->ilen + i;
15749b5e25fSSatish Balay     for (k=kstart; k<kend; k++) {
15849b5e25fSSatish Balay       if ((tcol=ssmap[a->j[k]])) {
15949b5e25fSSatish Balay         *mat_j++ = tcol - 1;
16049b5e25fSSatish Balay         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
16149b5e25fSSatish Balay         mat_a   += bs2;
16249b5e25fSSatish Balay         (*mat_ilen)++;
16349b5e25fSSatish Balay       }
16449b5e25fSSatish Balay     }
16549b5e25fSSatish Balay   }
16649b5e25fSSatish Balay 
16749b5e25fSSatish Balay   /* Free work space */
16849b5e25fSSatish Balay   ierr = PetscFree(smap);CHKERRQ(ierr);
16949b5e25fSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
17049b5e25fSSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17149b5e25fSSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17249b5e25fSSatish Balay 
17349b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
17449b5e25fSSatish Balay   *B = C;
17549b5e25fSSatish Balay   PetscFunctionReturn(0);
17649b5e25fSSatish Balay }
17749b5e25fSSatish Balay 
1784a2ae208SSatish Balay #undef __FUNCT__
1794a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ"
18013f74950SBarry Smith PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
18149b5e25fSSatish Balay {
18249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
18349b5e25fSSatish Balay   IS             is1;
1846849ba73SBarry Smith   PetscErrorCode ierr;
185899cda47SBarry Smith   PetscInt       *vary,*iary,*irow,nrows,i,bs=A->rmap.bs,count;
18649b5e25fSSatish Balay 
18749b5e25fSSatish Balay   PetscFunctionBegin;
188e005ede5SBarry Smith   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
18949b5e25fSSatish Balay 
19049b5e25fSSatish Balay   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
19149b5e25fSSatish Balay   ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr);
19249b5e25fSSatish Balay 
19349b5e25fSSatish Balay   /* Verify if the indices corespond to each element in a block
19449b5e25fSSatish Balay    and form the IS with compressed IS */
19513f74950SBarry Smith   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr);
19649b5e25fSSatish Balay   iary = vary + a->mbs;
19713f74950SBarry Smith   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
19849b5e25fSSatish Balay   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
19949b5e25fSSatish Balay 
20049b5e25fSSatish Balay   count = 0;
20149b5e25fSSatish Balay   for (i=0; i<a->mbs; i++) {
202e005ede5SBarry Smith     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
20349b5e25fSSatish Balay     if (vary[i]==bs) iary[count++] = i;
20449b5e25fSSatish Balay   }
20549b5e25fSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
20649b5e25fSSatish Balay 
20749b5e25fSSatish Balay   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
20849b5e25fSSatish Balay   ierr = PetscFree(vary);CHKERRQ(ierr);
20949b5e25fSSatish Balay 
21049b5e25fSSatish Balay   ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr);
21149b5e25fSSatish Balay   ISDestroy(is1);
21249b5e25fSSatish Balay   PetscFunctionReturn(0);
21349b5e25fSSatish Balay }
21449b5e25fSSatish Balay 
2154a2ae208SSatish Balay #undef __FUNCT__
2164a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ"
21713f74950SBarry Smith PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
21849b5e25fSSatish Balay {
2196849ba73SBarry Smith   PetscErrorCode ierr;
22013f74950SBarry Smith   PetscInt       i;
22149b5e25fSSatish Balay 
22249b5e25fSSatish Balay   PetscFunctionBegin;
22349b5e25fSSatish Balay   if (scall == MAT_INITIAL_MATRIX) {
22482502324SSatish Balay     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
22549b5e25fSSatish Balay   }
22649b5e25fSSatish Balay 
22749b5e25fSSatish Balay   for (i=0; i<n; i++) {
22849b5e25fSSatish Balay     ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
22949b5e25fSSatish Balay   }
23049b5e25fSSatish Balay   PetscFunctionReturn(0);
23149b5e25fSSatish Balay }
23249b5e25fSSatish Balay 
23349b5e25fSSatish Balay /* -------------------------------------------------------*/
23449b5e25fSSatish Balay /* Should check that shapes of vectors and matrices match */
23549b5e25fSSatish Balay /* -------------------------------------------------------*/
236d9eff348SSatish Balay #include "petscblaslapack.h"
23749b5e25fSSatish Balay 
2384a2ae208SSatish Balay #undef __FUNCT__
2394a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_1"
240dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
24149b5e25fSSatish Balay {
24249b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
24387828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,zero=0.0;
24449b5e25fSSatish Balay   MatScalar      *v;
2456849ba73SBarry Smith   PetscErrorCode ierr;
24613f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
24749b5e25fSSatish Balay 
24849b5e25fSSatish Balay   PetscFunctionBegin;
2492dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2501ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2511ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
25249b5e25fSSatish Balay 
25349b5e25fSSatish Balay   v  = a->a;
25449b5e25fSSatish Balay   xb = x;
25549b5e25fSSatish Balay 
25649b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
25749b5e25fSSatish Balay     n    = ai[1] - ai[0];  /* length of i_th row of A */
258*3cb60589SBarry Smith     x1   = *xb++;
259*3cb60589SBarry Smith     ib   = aj + *ai++;
260831a3094SHong Zhang     jmin = 0;
261*3cb60589SBarry Smith     /* if we ALWAYS required a diagonal entry then could remove this if test */
262*3cb60589SBarry Smith     /* should we use a tmp to hold the accumulated z[i] */
263831a3094SHong Zhang     if (*ib == i) {      /* (diag of A)*x */
264831a3094SHong Zhang       z[i] += *v++ * x[*ib++];
265831a3094SHong Zhang       jmin++;
266831a3094SHong Zhang     }
267831a3094SHong Zhang     for (j=jmin; j<n; j++) {
26849b5e25fSSatish Balay       cval    = *ib;
26949b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
27049b5e25fSSatish Balay       z[i]    += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
27149b5e25fSSatish Balay     }
27249b5e25fSSatish Balay   }
27349b5e25fSSatish Balay 
2741ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2751ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
276899cda47SBarry Smith   ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr);  /* nz = (nz+m)/2 */
27749b5e25fSSatish Balay   PetscFunctionReturn(0);
27849b5e25fSSatish Balay }
27949b5e25fSSatish Balay 
2804a2ae208SSatish Balay #undef __FUNCT__
2814a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_2"
282dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
28349b5e25fSSatish Balay {
28449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
28587828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
28649b5e25fSSatish Balay   MatScalar      *v;
2876849ba73SBarry Smith   PetscErrorCode ierr;
28813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
28949b5e25fSSatish Balay 
29049b5e25fSSatish Balay   PetscFunctionBegin;
2912dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
2921ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2931ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
29449b5e25fSSatish Balay 
29549b5e25fSSatish Balay   v     = a->a;
29649b5e25fSSatish Balay   xb = x;
29749b5e25fSSatish Balay 
29849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
29949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
30049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
30149b5e25fSSatish Balay     ib = aj + *ai;
302831a3094SHong Zhang     jmin = 0;
3037fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
30449b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
30549b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
306831a3094SHong Zhang       v += 4; jmin++;
3077fbae186SHong Zhang     }
308831a3094SHong Zhang     for (j=jmin; j<n; j++) {
30949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
31049b5e25fSSatish Balay       cval       = ib[j]*2;
31149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
31249b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
31349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
31449b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
31549b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
31649b5e25fSSatish Balay       v  += 4;
31749b5e25fSSatish Balay     }
31849b5e25fSSatish Balay     xb +=2; ai++;
31949b5e25fSSatish Balay   }
32049b5e25fSSatish Balay 
3211ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3221ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
323899cda47SBarry Smith   ierr = PetscLogFlops(8*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr);
32449b5e25fSSatish Balay   PetscFunctionReturn(0);
32549b5e25fSSatish Balay }
32649b5e25fSSatish Balay 
3274a2ae208SSatish Balay #undef __FUNCT__
3284a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_3"
329dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
33049b5e25fSSatish Balay {
33149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
33287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
33349b5e25fSSatish Balay   MatScalar      *v;
3346849ba73SBarry Smith   PetscErrorCode ierr;
33513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
33649b5e25fSSatish Balay 
33749b5e25fSSatish Balay   PetscFunctionBegin;
3382dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3391ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3401ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
34149b5e25fSSatish Balay 
34249b5e25fSSatish Balay   v    = a->a;
34349b5e25fSSatish Balay   xb   = x;
34449b5e25fSSatish Balay 
34549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
34649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
34749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
34849b5e25fSSatish Balay     ib = aj + *ai;
349831a3094SHong Zhang     jmin = 0;
3507fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
35149b5e25fSSatish Balay       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
35249b5e25fSSatish Balay       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
35349b5e25fSSatish Balay       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
354831a3094SHong Zhang       v += 9; jmin++;
3557fbae186SHong Zhang     }
356831a3094SHong Zhang     for (j=jmin; j<n; j++) {
35749b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
35849b5e25fSSatish Balay       cval       = ib[j]*3;
35949b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
36049b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
36149b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
36249b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
36349b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
36449b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
36549b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
36649b5e25fSSatish Balay       v  += 9;
36749b5e25fSSatish Balay     }
36849b5e25fSSatish Balay     xb +=3; ai++;
36949b5e25fSSatish Balay   }
37049b5e25fSSatish Balay 
3711ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3721ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
373899cda47SBarry Smith   ierr = PetscLogFlops(18*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr);
37449b5e25fSSatish Balay   PetscFunctionReturn(0);
37549b5e25fSSatish Balay }
37649b5e25fSSatish Balay 
3774a2ae208SSatish Balay #undef __FUNCT__
3784a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_4"
379dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
38049b5e25fSSatish Balay {
38149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
38287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
38349b5e25fSSatish Balay   MatScalar      *v;
3846849ba73SBarry Smith   PetscErrorCode ierr;
38513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
38649b5e25fSSatish Balay 
38749b5e25fSSatish Balay   PetscFunctionBegin;
3882dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
3891ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3901ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
39149b5e25fSSatish Balay 
39249b5e25fSSatish Balay   v     = a->a;
39349b5e25fSSatish Balay   xb = x;
39449b5e25fSSatish Balay 
39549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
39649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
39749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
39849b5e25fSSatish Balay     ib = aj + *ai;
399831a3094SHong Zhang     jmin = 0;
4007fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
40149b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
40249b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
40349b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
40449b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
405831a3094SHong Zhang       v += 16; jmin++;
4067fbae186SHong Zhang     }
407831a3094SHong Zhang     for (j=jmin; j<n; j++) {
40849b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
40949b5e25fSSatish Balay       cval       = ib[j]*4;
41049b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
41149b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
41249b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
41349b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
41449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
41549b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
41649b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
41749b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
41849b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
41949b5e25fSSatish Balay       v  += 16;
42049b5e25fSSatish Balay     }
42149b5e25fSSatish Balay     xb +=4; ai++;
42249b5e25fSSatish Balay   }
42349b5e25fSSatish Balay 
4241ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4251ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
426899cda47SBarry Smith   ierr = PetscLogFlops(32*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr);
42749b5e25fSSatish Balay   PetscFunctionReturn(0);
42849b5e25fSSatish Balay }
42949b5e25fSSatish Balay 
4304a2ae208SSatish Balay #undef __FUNCT__
4314a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_5"
432dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
43349b5e25fSSatish Balay {
43449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
43587828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
43649b5e25fSSatish Balay   MatScalar      *v;
4376849ba73SBarry Smith   PetscErrorCode ierr;
43813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
43949b5e25fSSatish Balay 
44049b5e25fSSatish Balay   PetscFunctionBegin;
4412dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4421ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4431ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
44449b5e25fSSatish Balay 
44549b5e25fSSatish Balay   v     = a->a;
44649b5e25fSSatish Balay   xb = x;
44749b5e25fSSatish Balay 
44849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
44949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
45049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
45149b5e25fSSatish Balay     ib = aj + *ai;
452831a3094SHong Zhang     jmin = 0;
4537fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
45449b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
45549b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
45649b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
45749b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
45849b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
459831a3094SHong Zhang       v += 25; jmin++;
4607fbae186SHong Zhang     }
461831a3094SHong Zhang     for (j=jmin; j<n; j++) {
46249b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
46349b5e25fSSatish Balay       cval       = ib[j]*5;
46449b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
46549b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
46649b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
46749b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
46849b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
46949b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
47049b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
47149b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
47249b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
47349b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
47449b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
47549b5e25fSSatish Balay       v  += 25;
47649b5e25fSSatish Balay     }
47749b5e25fSSatish Balay     xb +=5; ai++;
47849b5e25fSSatish Balay   }
47949b5e25fSSatish Balay 
4801ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4811ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
482899cda47SBarry Smith   ierr = PetscLogFlops(50*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr);
48349b5e25fSSatish Balay   PetscFunctionReturn(0);
48449b5e25fSSatish Balay }
48549b5e25fSSatish Balay 
48649b5e25fSSatish Balay 
4874a2ae208SSatish Balay #undef __FUNCT__
4884a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_6"
489dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
49049b5e25fSSatish Balay {
49149b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
49287828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
49349b5e25fSSatish Balay   MatScalar      *v;
4946849ba73SBarry Smith   PetscErrorCode ierr;
49513f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
49649b5e25fSSatish Balay 
49749b5e25fSSatish Balay   PetscFunctionBegin;
4982dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
4991ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5001ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
50149b5e25fSSatish Balay 
50249b5e25fSSatish Balay   v     = a->a;
50349b5e25fSSatish Balay   xb = x;
50449b5e25fSSatish Balay 
50549b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
50649b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
50749b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
50849b5e25fSSatish Balay     ib = aj + *ai;
509831a3094SHong Zhang     jmin = 0;
5107fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
51149b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
51249b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
51349b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
51449b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
51549b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
51649b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
517831a3094SHong Zhang       v += 36; jmin++;
5187fbae186SHong Zhang     }
519831a3094SHong Zhang     for (j=jmin; j<n; j++) {
52049b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
52149b5e25fSSatish Balay       cval       = ib[j]*6;
52249b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
52349b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
52449b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
52549b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
52649b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
52749b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
52849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
52949b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
53049b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
53149b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
53249b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
53349b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
53449b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
53549b5e25fSSatish Balay       v  += 36;
53649b5e25fSSatish Balay     }
53749b5e25fSSatish Balay     xb +=6; ai++;
53849b5e25fSSatish Balay   }
53949b5e25fSSatish Balay 
5401ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5411ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
542899cda47SBarry Smith   ierr = PetscLogFlops(72*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr);
54349b5e25fSSatish Balay   PetscFunctionReturn(0);
54449b5e25fSSatish Balay }
5454a2ae208SSatish Balay #undef __FUNCT__
5464a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_7"
547dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
54849b5e25fSSatish Balay {
54949b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
55087828ca2SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
55149b5e25fSSatish Balay   MatScalar      *v;
5526849ba73SBarry Smith   PetscErrorCode ierr;
55313f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
55449b5e25fSSatish Balay 
55549b5e25fSSatish Balay   PetscFunctionBegin;
5562dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
5571ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5581ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
55949b5e25fSSatish Balay 
56049b5e25fSSatish Balay   v     = a->a;
56149b5e25fSSatish Balay   xb = x;
56249b5e25fSSatish Balay 
56349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
56449b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
56549b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
56649b5e25fSSatish Balay     ib = aj + *ai;
567831a3094SHong Zhang     jmin = 0;
5687fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
56949b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
57049b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
57149b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
57249b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
57349b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
57449b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
57549b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
576831a3094SHong Zhang       v += 49; jmin++;
5777fbae186SHong Zhang     }
578831a3094SHong Zhang     for (j=jmin; j<n; j++) {
57949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
58049b5e25fSSatish Balay       cval       = ib[j]*7;
58149b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
58249b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
58349b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
58449b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
58549b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
58649b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
58749b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
58849b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
58949b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
59049b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
59149b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
59249b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
59349b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
59449b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
59549b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
59649b5e25fSSatish Balay       v  += 49;
59749b5e25fSSatish Balay     }
59849b5e25fSSatish Balay     xb +=7; ai++;
59949b5e25fSSatish Balay   }
6001ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6011ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
602899cda47SBarry Smith   ierr = PetscLogFlops(98*(a->nz*2 - A->rmap.N) - A->rmap.N);CHKERRQ(ierr);
60349b5e25fSSatish Balay   PetscFunctionReturn(0);
60449b5e25fSSatish Balay }
60549b5e25fSSatish Balay 
60649b5e25fSSatish Balay /*
60749b5e25fSSatish Balay     This will not work with MatScalar == float because it calls the BLAS
60849b5e25fSSatish Balay */
6094a2ae208SSatish Balay #undef __FUNCT__
6104a2ae208SSatish Balay #define __FUNCT__ "MatMult_SeqSBAIJ_N"
611dfbe8321SBarry Smith PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
61249b5e25fSSatish Balay {
61349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
61487828ca2SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
6150b60a74dSHong Zhang   MatScalar      *v;
616dfbe8321SBarry Smith   PetscErrorCode ierr;
617899cda47SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2,ncols,k;
61849b5e25fSSatish Balay 
61949b5e25fSSatish Balay   PetscFunctionBegin;
6202dcb1b2aSMatthew Knepley   ierr = VecSet(zz,zero);CHKERRQ(ierr);
6211ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
6221ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
62349b5e25fSSatish Balay 
62449b5e25fSSatish Balay   aj   = a->j;
62549b5e25fSSatish Balay   v    = a->a;
62649b5e25fSSatish Balay   ii   = a->i;
62749b5e25fSSatish Balay 
62849b5e25fSSatish Balay   if (!a->mult_work) {
629899cda47SBarry Smith     ierr = PetscMalloc((A->rmap.N+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
63049b5e25fSSatish Balay   }
63149b5e25fSSatish Balay   work = a->mult_work;
63249b5e25fSSatish Balay 
63349b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
63449b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
63549b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
63649b5e25fSSatish Balay 
63749b5e25fSSatish Balay     /* upper triangular part */
63849b5e25fSSatish Balay     for (j=0; j<n; j++) {
63949b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
64049b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
64149b5e25fSSatish Balay       workt += bs;
64249b5e25fSSatish Balay     }
64349b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
64449b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
64549b5e25fSSatish Balay 
64649b5e25fSSatish Balay     /* strict lower triangular part */
647831a3094SHong Zhang     idx = aj+ii[0];
648831a3094SHong Zhang     if (*idx == i){
64996b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
650831a3094SHong Zhang     }
65196b9376eSHong Zhang 
65249b5e25fSSatish Balay     if (ncols > 0){
65349b5e25fSSatish Balay       workt = work;
65487828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
655831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
656831a3094SHong Zhang       for (j=0; j<n; j++) {
657831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
65849b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
65949b5e25fSSatish Balay         workt += bs;
66049b5e25fSSatish Balay       }
66149b5e25fSSatish Balay     }
66249b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
66349b5e25fSSatish Balay   }
66449b5e25fSSatish Balay 
6651ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6661ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
667899cda47SBarry Smith   ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.N)*bs2 - A->rmap.N);CHKERRQ(ierr);
66849b5e25fSSatish Balay   PetscFunctionReturn(0);
66949b5e25fSSatish Balay }
67049b5e25fSSatish Balay 
6716a65c766SHong Zhang extern PetscErrorCode VecCopy_Seq(Vec,Vec);
6724a2ae208SSatish Balay #undef __FUNCT__
6734a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1"
674dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
67549b5e25fSSatish Balay {
67649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
677bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1;
67849b5e25fSSatish Balay   MatScalar      *v;
6796849ba73SBarry Smith   PetscErrorCode ierr;
68013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
68149b5e25fSSatish Balay 
68249b5e25fSSatish Balay   PetscFunctionBegin;
6836a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
6841ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6851ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
68649b5e25fSSatish Balay   v  = a->a;
68749b5e25fSSatish Balay   xb = x;
68849b5e25fSSatish Balay 
68949b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
69049b5e25fSSatish Balay     n  = ai[1] - ai[0];  /* length of i_th row of A */
69149b5e25fSSatish Balay     x1 = xb[0];
69249b5e25fSSatish Balay     ib = aj + *ai;
693831a3094SHong Zhang     jmin = 0;
694831a3094SHong Zhang     if (*ib == i) {            /* (diag of A)*x */
695831a3094SHong Zhang       z[i] += *v++ * x[*ib++]; jmin++;
696831a3094SHong Zhang     }
697831a3094SHong Zhang     for (j=jmin; j<n; j++) {
69849b5e25fSSatish Balay       cval    = *ib;
69949b5e25fSSatish Balay       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
70049b5e25fSSatish Balay       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
70149b5e25fSSatish Balay     }
70249b5e25fSSatish Balay     xb++; ai++;
70349b5e25fSSatish Balay   }
70449b5e25fSSatish Balay 
7051ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
706bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
70749b5e25fSSatish Balay 
708899cda47SBarry Smith   ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
70949b5e25fSSatish Balay   PetscFunctionReturn(0);
71049b5e25fSSatish Balay }
71149b5e25fSSatish Balay 
7124a2ae208SSatish Balay #undef __FUNCT__
7134a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2"
714dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
71549b5e25fSSatish Balay {
71649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
717bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2;
71849b5e25fSSatish Balay   MatScalar      *v;
7196849ba73SBarry Smith   PetscErrorCode ierr;
72013f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
72149b5e25fSSatish Balay 
72249b5e25fSSatish Balay   PetscFunctionBegin;
7236a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7241ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7251ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
72649b5e25fSSatish Balay 
72749b5e25fSSatish Balay   v  = a->a;
72849b5e25fSSatish Balay   xb = x;
72949b5e25fSSatish Balay 
73049b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
73149b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
73249b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1];
73349b5e25fSSatish Balay     ib = aj + *ai;
734831a3094SHong Zhang     jmin = 0;
7357fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
73649b5e25fSSatish Balay       z[2*i]   += v[0]*x1 + v[2]*x2;
73749b5e25fSSatish Balay       z[2*i+1] += v[2]*x1 + v[3]*x2;
738831a3094SHong Zhang       v += 4; jmin++;
7397fbae186SHong Zhang     }
740831a3094SHong Zhang     for (j=jmin; j<n; j++) {
74149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
74249b5e25fSSatish Balay       cval       = ib[j]*2;
74349b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2;
74449b5e25fSSatish Balay       z[cval+1]   += v[2]*x1 + v[3]*x2;
74549b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
74649b5e25fSSatish Balay       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
74749b5e25fSSatish Balay       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
74849b5e25fSSatish Balay       v  += 4;
74949b5e25fSSatish Balay     }
75049b5e25fSSatish Balay     xb +=2; ai++;
75149b5e25fSSatish Balay   }
7521ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
753bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
75449b5e25fSSatish Balay 
755899cda47SBarry Smith   ierr = PetscLogFlops(4*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
75649b5e25fSSatish Balay   PetscFunctionReturn(0);
75749b5e25fSSatish Balay }
75849b5e25fSSatish Balay 
7594a2ae208SSatish Balay #undef __FUNCT__
7604a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3"
761dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
76249b5e25fSSatish Balay {
76349b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
764bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3;
76549b5e25fSSatish Balay   MatScalar      *v;
7666849ba73SBarry Smith   PetscErrorCode ierr;
76713f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
76849b5e25fSSatish Balay 
76949b5e25fSSatish Balay   PetscFunctionBegin;
7706a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
7711ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7721ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
77349b5e25fSSatish Balay 
77449b5e25fSSatish Balay   v     = a->a;
77549b5e25fSSatish Balay   xb = x;
77649b5e25fSSatish Balay 
77749b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
77849b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
77949b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
78049b5e25fSSatish Balay     ib = aj + *ai;
781831a3094SHong Zhang     jmin = 0;
7827fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
78349b5e25fSSatish Balay      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
78449b5e25fSSatish Balay      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
78549b5e25fSSatish Balay      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
786831a3094SHong Zhang      v += 9; jmin++;
7877fbae186SHong Zhang     }
788831a3094SHong Zhang     for (j=jmin; j<n; j++) {
78949b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
79049b5e25fSSatish Balay       cval       = ib[j]*3;
79149b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
79249b5e25fSSatish Balay       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
79349b5e25fSSatish Balay       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
79449b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
79549b5e25fSSatish Balay       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
79649b5e25fSSatish Balay       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
79749b5e25fSSatish Balay       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
79849b5e25fSSatish Balay       v  += 9;
79949b5e25fSSatish Balay     }
80049b5e25fSSatish Balay     xb +=3; ai++;
80149b5e25fSSatish Balay   }
80249b5e25fSSatish Balay 
8031ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
804bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
80549b5e25fSSatish Balay 
806899cda47SBarry Smith   ierr = PetscLogFlops(18*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
80749b5e25fSSatish Balay   PetscFunctionReturn(0);
80849b5e25fSSatish Balay }
80949b5e25fSSatish Balay 
8104a2ae208SSatish Balay #undef __FUNCT__
8114a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4"
812dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
81349b5e25fSSatish Balay {
81449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
815bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
81649b5e25fSSatish Balay   MatScalar      *v;
8176849ba73SBarry Smith   PetscErrorCode ierr;
81813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
81949b5e25fSSatish Balay 
82049b5e25fSSatish Balay   PetscFunctionBegin;
8216a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8221ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8231ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
82449b5e25fSSatish Balay 
82549b5e25fSSatish Balay   v     = a->a;
82649b5e25fSSatish Balay   xb = x;
82749b5e25fSSatish Balay 
82849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
82949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
83049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
83149b5e25fSSatish Balay     ib = aj + *ai;
832831a3094SHong Zhang     jmin = 0;
8337fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
83449b5e25fSSatish Balay       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
83549b5e25fSSatish Balay       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
83649b5e25fSSatish Balay       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
83749b5e25fSSatish Balay       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
838831a3094SHong Zhang       v += 16; jmin++;
8397fbae186SHong Zhang     }
840831a3094SHong Zhang     for (j=jmin; j<n; j++) {
84149b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
84249b5e25fSSatish Balay       cval       = ib[j]*4;
84349b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
84449b5e25fSSatish Balay       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
84549b5e25fSSatish Balay       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
84649b5e25fSSatish Balay       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
84749b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
84849b5e25fSSatish Balay       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
84949b5e25fSSatish Balay       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
85049b5e25fSSatish Balay       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
85149b5e25fSSatish Balay       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
85249b5e25fSSatish Balay       v  += 16;
85349b5e25fSSatish Balay     }
85449b5e25fSSatish Balay     xb +=4; ai++;
85549b5e25fSSatish Balay   }
85649b5e25fSSatish Balay 
8571ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
858bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
85949b5e25fSSatish Balay 
860899cda47SBarry Smith   ierr = PetscLogFlops(32*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
86149b5e25fSSatish Balay   PetscFunctionReturn(0);
86249b5e25fSSatish Balay }
86349b5e25fSSatish Balay 
8644a2ae208SSatish Balay #undef __FUNCT__
8654a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5"
866dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
86749b5e25fSSatish Balay {
86849b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
869bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
87049b5e25fSSatish Balay   MatScalar      *v;
8716849ba73SBarry Smith   PetscErrorCode ierr;
87213f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
87349b5e25fSSatish Balay 
87449b5e25fSSatish Balay   PetscFunctionBegin;
8756a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
8761ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8771ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
87849b5e25fSSatish Balay 
87949b5e25fSSatish Balay   v     = a->a;
88049b5e25fSSatish Balay   xb = x;
88149b5e25fSSatish Balay 
88249b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
88349b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
88449b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
88549b5e25fSSatish Balay     ib = aj + *ai;
886831a3094SHong Zhang     jmin = 0;
8877fbae186SHong Zhang     if (*ib == i){      /* (diag of A)*x */
88849b5e25fSSatish Balay       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
88949b5e25fSSatish Balay       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
89049b5e25fSSatish Balay       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
89149b5e25fSSatish Balay       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
89249b5e25fSSatish Balay       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
893831a3094SHong Zhang       v += 25; jmin++;
8947fbae186SHong Zhang     }
895831a3094SHong Zhang     for (j=jmin; j<n; j++) {
89649b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
89749b5e25fSSatish Balay       cval       = ib[j]*5;
89849b5e25fSSatish Balay       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
89949b5e25fSSatish Balay       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
90049b5e25fSSatish Balay       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
90149b5e25fSSatish Balay       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
90249b5e25fSSatish Balay       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
90349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
90449b5e25fSSatish Balay       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
90549b5e25fSSatish Balay       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
90649b5e25fSSatish Balay       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
90749b5e25fSSatish Balay       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
90849b5e25fSSatish Balay       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
90949b5e25fSSatish Balay       v  += 25;
91049b5e25fSSatish Balay     }
91149b5e25fSSatish Balay     xb +=5; ai++;
91249b5e25fSSatish Balay   }
91349b5e25fSSatish Balay 
9141ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
915bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
91649b5e25fSSatish Balay 
917899cda47SBarry Smith   ierr = PetscLogFlops(50*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
91849b5e25fSSatish Balay   PetscFunctionReturn(0);
91949b5e25fSSatish Balay }
9204a2ae208SSatish Balay #undef __FUNCT__
9214a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6"
922dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
92349b5e25fSSatish Balay {
92449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
925bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
92649b5e25fSSatish Balay   MatScalar      *v;
9276849ba73SBarry Smith   PetscErrorCode ierr;
92813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
92949b5e25fSSatish Balay 
93049b5e25fSSatish Balay   PetscFunctionBegin;
9316a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9321ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9331ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
93449b5e25fSSatish Balay 
93549b5e25fSSatish Balay   v     = a->a;
93649b5e25fSSatish Balay   xb = x;
93749b5e25fSSatish Balay 
93849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
93949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
94049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
94149b5e25fSSatish Balay     ib = aj + *ai;
942831a3094SHong Zhang     jmin = 0;
9437fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
94449b5e25fSSatish Balay       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
94549b5e25fSSatish Balay       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
94649b5e25fSSatish Balay       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
94749b5e25fSSatish Balay       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
94849b5e25fSSatish Balay       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
94949b5e25fSSatish Balay       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
950831a3094SHong Zhang       v += 36; jmin++;
9517fbae186SHong Zhang     }
952831a3094SHong Zhang     for (j=jmin; j<n; j++) {
95349b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
95449b5e25fSSatish Balay       cval       = ib[j]*6;
95549b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
95649b5e25fSSatish Balay       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
95749b5e25fSSatish Balay       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
95849b5e25fSSatish Balay       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
95949b5e25fSSatish Balay       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
96049b5e25fSSatish Balay       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
96149b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
96249b5e25fSSatish Balay       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
96349b5e25fSSatish Balay       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
96449b5e25fSSatish Balay       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
96549b5e25fSSatish Balay       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
96649b5e25fSSatish Balay       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
96749b5e25fSSatish Balay       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
96849b5e25fSSatish Balay       v  += 36;
96949b5e25fSSatish Balay     }
97049b5e25fSSatish Balay     xb +=6; ai++;
97149b5e25fSSatish Balay   }
97249b5e25fSSatish Balay 
9731ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
974bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
97549b5e25fSSatish Balay 
976899cda47SBarry Smith   ierr = PetscLogFlops(72*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
97749b5e25fSSatish Balay   PetscFunctionReturn(0);
97849b5e25fSSatish Balay }
97949b5e25fSSatish Balay 
9804a2ae208SSatish Balay #undef __FUNCT__
9814a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7"
982dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
98349b5e25fSSatish Balay {
98449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
985bba805e6SBarry Smith   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
98649b5e25fSSatish Balay   MatScalar      *v;
9876849ba73SBarry Smith   PetscErrorCode ierr;
98813f74950SBarry Smith   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
98949b5e25fSSatish Balay 
99049b5e25fSSatish Balay   PetscFunctionBegin;
9916a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
9921ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9931ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
99449b5e25fSSatish Balay 
99549b5e25fSSatish Balay   v     = a->a;
99649b5e25fSSatish Balay   xb = x;
99749b5e25fSSatish Balay 
99849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
99949b5e25fSSatish Balay     n  = ai[1] - ai[0]; /* length of i_th block row of A */
100049b5e25fSSatish Balay     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
100149b5e25fSSatish Balay     ib = aj + *ai;
1002831a3094SHong Zhang     jmin = 0;
10037fbae186SHong Zhang     if (*ib == i){     /* (diag of A)*x */
100449b5e25fSSatish Balay       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
100549b5e25fSSatish Balay       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
100649b5e25fSSatish Balay       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
100749b5e25fSSatish Balay       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
100849b5e25fSSatish Balay       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
100949b5e25fSSatish Balay       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
101049b5e25fSSatish Balay       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1011831a3094SHong Zhang       v += 49; jmin++;
10127fbae186SHong Zhang     }
1013831a3094SHong Zhang     for (j=jmin; j<n; j++) {
101449b5e25fSSatish Balay       /* (strict lower triangular part of A)*x  */
101549b5e25fSSatish Balay       cval       = ib[j]*7;
101649b5e25fSSatish Balay       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
101749b5e25fSSatish Balay       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
101849b5e25fSSatish Balay       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
101949b5e25fSSatish Balay       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
102049b5e25fSSatish Balay       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
102149b5e25fSSatish Balay       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
102249b5e25fSSatish Balay       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
102349b5e25fSSatish Balay       /* (strict upper triangular part of A)*x  */
102449b5e25fSSatish Balay       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
102549b5e25fSSatish Balay       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
102649b5e25fSSatish Balay       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
102749b5e25fSSatish Balay       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
102849b5e25fSSatish Balay       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
102949b5e25fSSatish Balay       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
103049b5e25fSSatish Balay       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
103149b5e25fSSatish Balay       v  += 49;
103249b5e25fSSatish Balay     }
103349b5e25fSSatish Balay     xb +=7; ai++;
103449b5e25fSSatish Balay   }
103549b5e25fSSatish Balay 
10361ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1037bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
103849b5e25fSSatish Balay 
1039899cda47SBarry Smith   ierr = PetscLogFlops(98*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
104049b5e25fSSatish Balay   PetscFunctionReturn(0);
104149b5e25fSSatish Balay }
104249b5e25fSSatish Balay 
10434a2ae208SSatish Balay #undef __FUNCT__
10444a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N"
1045dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
104649b5e25fSSatish Balay {
104749b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1048bba805e6SBarry Smith   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1049066653e3SSatish Balay   MatScalar      *v;
1050dfbe8321SBarry Smith   PetscErrorCode ierr;
1051899cda47SBarry Smith   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2,ncols,k;
105249b5e25fSSatish Balay 
105349b5e25fSSatish Balay   PetscFunctionBegin;
10546a65c766SHong Zhang   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
10551ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x;
10561ebc52fbSHong Zhang   ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z;
105749b5e25fSSatish Balay 
105849b5e25fSSatish Balay   aj   = a->j;
105949b5e25fSSatish Balay   v    = a->a;
106049b5e25fSSatish Balay   ii   = a->i;
106149b5e25fSSatish Balay 
106249b5e25fSSatish Balay   if (!a->mult_work) {
1063899cda47SBarry Smith     ierr = PetscMalloc((A->rmap.n+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
106449b5e25fSSatish Balay   }
106549b5e25fSSatish Balay   work = a->mult_work;
106649b5e25fSSatish Balay 
106749b5e25fSSatish Balay 
106849b5e25fSSatish Balay   for (i=0; i<mbs; i++) {
106949b5e25fSSatish Balay     n     = ii[1] - ii[0]; ncols = n*bs;
107049b5e25fSSatish Balay     workt = work; idx=aj+ii[0];
107149b5e25fSSatish Balay 
107249b5e25fSSatish Balay     /* upper triangular part */
107349b5e25fSSatish Balay     for (j=0; j<n; j++) {
107449b5e25fSSatish Balay       xb = x_ptr + bs*(*idx++);
107549b5e25fSSatish Balay       for (k=0; k<bs; k++) workt[k] = xb[k];
107649b5e25fSSatish Balay       workt += bs;
107749b5e25fSSatish Balay     }
107849b5e25fSSatish Balay     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
107949b5e25fSSatish Balay     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
108049b5e25fSSatish Balay 
108149b5e25fSSatish Balay     /* strict lower triangular part */
1082831a3094SHong Zhang     idx = aj+ii[0];
1083831a3094SHong Zhang     if (*idx == i){
108496b9376eSHong Zhang       ncols -= bs; v += bs2; idx++; n--;
1085831a3094SHong Zhang     }
108649b5e25fSSatish Balay     if (ncols > 0){
108749b5e25fSSatish Balay       workt = work;
108887828ca2SBarry Smith       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1089831a3094SHong Zhang       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1090831a3094SHong Zhang       for (j=0; j<n; j++) {
1091831a3094SHong Zhang         zb = z_ptr + bs*(*idx++);
109249b5e25fSSatish Balay         for (k=0; k<bs; k++) zb[k] += workt[k] ;
109349b5e25fSSatish Balay         workt += bs;
109449b5e25fSSatish Balay       }
109549b5e25fSSatish Balay     }
109649b5e25fSSatish Balay 
109749b5e25fSSatish Balay     x += bs; v += n*bs2; z += bs; ii++;
109849b5e25fSSatish Balay   }
109949b5e25fSSatish Balay 
11001ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1101bba805e6SBarry Smith   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
110249b5e25fSSatish Balay 
1103899cda47SBarry Smith   ierr = PetscLogFlops(2*(a->nz*2 - A->rmap.n));CHKERRQ(ierr);
110449b5e25fSSatish Balay   PetscFunctionReturn(0);
110549b5e25fSSatish Balay }
110649b5e25fSSatish Balay 
11074a2ae208SSatish Balay #undef __FUNCT__
11084a2ae208SSatish Balay #define __FUNCT__ "MatScale_SeqSBAIJ"
1109f4df32b1SMatthew Knepley PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
111049b5e25fSSatish Balay {
111149b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1112f4df32b1SMatthew Knepley   PetscScalar oalpha = alpha;
11134ce68768SBarry Smith   PetscBLASInt one = 1,totalnz = (PetscBLASInt)a->bs2*a->nz;
1114efee365bSSatish Balay   PetscErrorCode ierr;
111549b5e25fSSatish Balay 
111649b5e25fSSatish Balay   PetscFunctionBegin;
1117f4df32b1SMatthew Knepley   BLASscal_(&totalnz,&oalpha,a->a,&one);
1118efee365bSSatish Balay   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
111949b5e25fSSatish Balay   PetscFunctionReturn(0);
112049b5e25fSSatish Balay }
112149b5e25fSSatish Balay 
11224a2ae208SSatish Balay #undef __FUNCT__
11234a2ae208SSatish Balay #define __FUNCT__ "MatNorm_SeqSBAIJ"
1124dfbe8321SBarry Smith PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
112549b5e25fSSatish Balay {
112649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
112749b5e25fSSatish Balay   MatScalar      *v = a->a;
112849b5e25fSSatish Balay   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1129899cda47SBarry Smith   PetscInt       i,j,k,bs = A->rmap.bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
11306849ba73SBarry Smith   PetscErrorCode ierr;
113113f74950SBarry Smith   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
113249b5e25fSSatish Balay 
113349b5e25fSSatish Balay   PetscFunctionBegin;
113449b5e25fSSatish Balay   if (type == NORM_FROBENIUS) {
113549b5e25fSSatish Balay     for (k=0; k<mbs; k++){
113649b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
1137831a3094SHong Zhang       col  = aj + jmin;
1138831a3094SHong Zhang       if (*col == k){         /* diagonal block */
113949b5e25fSSatish Balay         for (i=0; i<bs2; i++){
114049b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
114149b5e25fSSatish Balay           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
114249b5e25fSSatish Balay #else
114349b5e25fSSatish Balay           sum_diag += (*v)*(*v); v++;
114449b5e25fSSatish Balay #endif
114549b5e25fSSatish Balay         }
1146831a3094SHong Zhang         jmin++;
1147831a3094SHong Zhang       }
1148831a3094SHong Zhang       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
114949b5e25fSSatish Balay         for (i=0; i<bs2; i++){
115049b5e25fSSatish Balay #if defined(PETSC_USE_COMPLEX)
115149b5e25fSSatish Balay           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
115249b5e25fSSatish Balay #else
115349b5e25fSSatish Balay           sum_off += (*v)*(*v); v++;
115449b5e25fSSatish Balay #endif
115549b5e25fSSatish Balay         }
115649b5e25fSSatish Balay       }
115749b5e25fSSatish Balay     }
115849b5e25fSSatish Balay     *norm = sqrt(sum_diag + 2*sum_off);
11590b8dc8d2SHong Zhang   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
11600b8dc8d2SHong Zhang     ierr = PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);CHKERRQ(ierr);
11610b8dc8d2SHong Zhang     jl   = il + mbs;
11620b8dc8d2SHong Zhang     sum  = (PetscReal*)(jl + mbs);
11630b8dc8d2SHong Zhang     for (i=0; i<mbs; i++) jl[i] = mbs;
11640b8dc8d2SHong Zhang     il[0] = 0;
116549b5e25fSSatish Balay 
116649b5e25fSSatish Balay     *norm = 0.0;
116749b5e25fSSatish Balay     for (k=0; k<mbs; k++) { /* k_th block row */
116849b5e25fSSatish Balay       for (j=0; j<bs; j++) sum[j]=0.0;
116949b5e25fSSatish Balay       /*-- col sum --*/
117049b5e25fSSatish Balay       i = jl[k]; /* first |A(i,k)| to be added */
1171831a3094SHong Zhang       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1172831a3094SHong Zhang                   at step k */
117349b5e25fSSatish Balay       while (i<mbs){
117449b5e25fSSatish Balay         nexti = jl[i];  /* next block row to be added */
117549b5e25fSSatish Balay         ik    = il[i];  /* block index of A(i,k) in the array a */
117649b5e25fSSatish Balay         for (j=0; j<bs; j++){
117749b5e25fSSatish Balay           v = a->a + ik*bs2 + j*bs;
117849b5e25fSSatish Balay           for (k1=0; k1<bs; k1++) {
117949b5e25fSSatish Balay             sum[j] += PetscAbsScalar(*v); v++;
118049b5e25fSSatish Balay           }
118149b5e25fSSatish Balay         }
118249b5e25fSSatish Balay         /* update il, jl */
1183831a3094SHong Zhang         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1184831a3094SHong Zhang         jmax = a->i[i+1];
118549b5e25fSSatish Balay         if (jmin < jmax){
118649b5e25fSSatish Balay           il[i] = jmin;
118749b5e25fSSatish Balay           j   = a->j[jmin];
118849b5e25fSSatish Balay           jl[i] = jl[j]; jl[j]=i;
118949b5e25fSSatish Balay         }
119049b5e25fSSatish Balay         i = nexti;
119149b5e25fSSatish Balay       }
119249b5e25fSSatish Balay       /*-- row sum --*/
119349b5e25fSSatish Balay       jmin = a->i[k]; jmax = a->i[k+1];
119449b5e25fSSatish Balay       for (i=jmin; i<jmax; i++) {
119549b5e25fSSatish Balay         for (j=0; j<bs; j++){
119649b5e25fSSatish Balay           v = a->a + i*bs2 + j;
119749b5e25fSSatish Balay           for (k1=0; k1<bs; k1++){
11980b8dc8d2SHong Zhang             sum[j] += PetscAbsScalar(*v); v += bs;
119949b5e25fSSatish Balay           }
120049b5e25fSSatish Balay         }
120149b5e25fSSatish Balay       }
120249b5e25fSSatish Balay       /* add k_th block row to il, jl */
1203831a3094SHong Zhang       col = aj+jmin;
1204831a3094SHong Zhang       if (*col == k) jmin++;
120549b5e25fSSatish Balay       if (jmin < jmax){
120649b5e25fSSatish Balay         il[k] = jmin;
12070b8dc8d2SHong Zhang         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
120849b5e25fSSatish Balay       }
120949b5e25fSSatish Balay       for (j=0; j<bs; j++){
121049b5e25fSSatish Balay         if (sum[j] > *norm) *norm = sum[j];
121149b5e25fSSatish Balay       }
121249b5e25fSSatish Balay     }
121349b5e25fSSatish Balay     ierr = PetscFree(il);CHKERRQ(ierr);
121449b5e25fSSatish Balay   } else {
1215347d480fSBarry Smith     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
121649b5e25fSSatish Balay   }
121749b5e25fSSatish Balay   PetscFunctionReturn(0);
121849b5e25fSSatish Balay }
121949b5e25fSSatish Balay 
12204a2ae208SSatish Balay #undef __FUNCT__
12214a2ae208SSatish Balay #define __FUNCT__ "MatEqual_SeqSBAIJ"
1222dfbe8321SBarry Smith PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
122349b5e25fSSatish Balay {
122449b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1225dfbe8321SBarry Smith   PetscErrorCode ierr;
122649b5e25fSSatish Balay 
122749b5e25fSSatish Balay   PetscFunctionBegin;
122849b5e25fSSatish Balay 
122949b5e25fSSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1230899cda47SBarry Smith   if ((A->rmap.N != B->rmap.N) || (A->cmap.n != B->cmap.n) || (A->rmap.bs != B->rmap.bs)|| (a->nz != b->nz)) {
1231ef511fbeSHong Zhang     *flg = PETSC_FALSE;
1232ef511fbeSHong Zhang     PetscFunctionReturn(0);
123349b5e25fSSatish Balay   }
123449b5e25fSSatish Balay 
123549b5e25fSSatish Balay   /* if the a->i are the same */
123613f74950SBarry Smith   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1237abc0a331SBarry Smith   if (!*flg) {
123849b5e25fSSatish Balay     PetscFunctionReturn(0);
123949b5e25fSSatish Balay   }
124049b5e25fSSatish Balay 
124149b5e25fSSatish Balay   /* if a->j are the same */
124213f74950SBarry Smith   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1243abc0a331SBarry Smith   if (!*flg) {
124449b5e25fSSatish Balay     PetscFunctionReturn(0);
124549b5e25fSSatish Balay   }
124649b5e25fSSatish Balay   /* if a->a are the same */
1247899cda47SBarry Smith   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap.bs)*(A->rmap.bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1248935af2e7SHong Zhang   PetscFunctionReturn(0);
124949b5e25fSSatish Balay }
125049b5e25fSSatish Balay 
12514a2ae208SSatish Balay #undef __FUNCT__
12524a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ"
1253dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
125449b5e25fSSatish Balay {
125549b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1256dfbe8321SBarry Smith   PetscErrorCode ierr;
125713f74950SBarry Smith   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
125887828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
125949b5e25fSSatish Balay   MatScalar      *aa,*aa_j;
126049b5e25fSSatish Balay 
126149b5e25fSSatish Balay   PetscFunctionBegin;
1262899cda47SBarry Smith   bs   = A->rmap.bs;
126382799104SHong Zhang   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
126482799104SHong Zhang 
126549b5e25fSSatish Balay   aa   = a->a;
126649b5e25fSSatish Balay   ai   = a->i;
126749b5e25fSSatish Balay   aj   = a->j;
126849b5e25fSSatish Balay   ambs = a->mbs;
126949b5e25fSSatish Balay   bs2  = a->bs2;
127049b5e25fSSatish Balay 
12712dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
12721ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
127349b5e25fSSatish Balay   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1274899cda47SBarry Smith   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
127549b5e25fSSatish Balay   for (i=0; i<ambs; i++) {
127649b5e25fSSatish Balay     j=ai[i];
127749b5e25fSSatish Balay     if (aj[j] == i) {             /* if this is a diagonal element */
127849b5e25fSSatish Balay       row  = i*bs;
127949b5e25fSSatish Balay       aa_j = aa + j*bs2;
128082799104SHong Zhang       if (A->factor && bs==1){
128182799104SHong Zhang         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
128282799104SHong Zhang       } else {
128349b5e25fSSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
128449b5e25fSSatish Balay       }
128549b5e25fSSatish Balay     }
128682799104SHong Zhang   }
128782799104SHong Zhang 
12881ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
128949b5e25fSSatish Balay   PetscFunctionReturn(0);
129049b5e25fSSatish Balay }
129149b5e25fSSatish Balay 
12924a2ae208SSatish Balay #undef __FUNCT__
12934a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ"
1294dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
129549b5e25fSSatish Balay {
129649b5e25fSSatish Balay   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
12975e90f9d9SHong Zhang   PetscScalar    *l,x,*li,*ri;
129849b5e25fSSatish Balay   MatScalar      *aa,*v;
1299dfbe8321SBarry Smith   PetscErrorCode ierr;
13005e90f9d9SHong Zhang   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1301b3bf805bSHong Zhang   PetscTruth     flg;
130249b5e25fSSatish Balay 
130349b5e25fSSatish Balay   PetscFunctionBegin;
1304b3bf805bSHong Zhang   if (ll != rr){
1305b3bf805bSHong Zhang     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1306b3bf805bSHong Zhang     if (!flg)
1307b3bf805bSHong Zhang       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1308b3bf805bSHong Zhang   }
1309b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
131049b5e25fSSatish Balay   ai  = a->i;
131149b5e25fSSatish Balay   aj  = a->j;
131249b5e25fSSatish Balay   aa  = a->a;
1313899cda47SBarry Smith   m   = A->rmap.N;
1314899cda47SBarry Smith   bs  = A->rmap.bs;
131549b5e25fSSatish Balay   mbs = a->mbs;
131649b5e25fSSatish Balay   bs2 = a->bs2;
131749b5e25fSSatish Balay 
13181ebc52fbSHong Zhang   ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
131949b5e25fSSatish Balay   ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1320347d480fSBarry Smith   if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
132149b5e25fSSatish Balay   for (i=0; i<mbs; i++) { /* for each block row */
132249b5e25fSSatish Balay     M  = ai[i+1] - ai[i];
132349b5e25fSSatish Balay     li = l + i*bs;
132449b5e25fSSatish Balay     v  = aa + bs2*ai[i];
132549b5e25fSSatish Balay     for (j=0; j<M; j++) { /* for each block */
132649b5e25fSSatish Balay       ri = l + bs*aj[ai[i]+j];
13275e90f9d9SHong Zhang       for (k=0; k<bs; k++) {
132849b5e25fSSatish Balay         x = ri[k];
132949b5e25fSSatish Balay         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
133049b5e25fSSatish Balay       }
133149b5e25fSSatish Balay     }
133249b5e25fSSatish Balay   }
13331ebc52fbSHong Zhang   ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1334efee365bSSatish Balay   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
133549b5e25fSSatish Balay   PetscFunctionReturn(0);
133649b5e25fSSatish Balay }
133749b5e25fSSatish Balay 
13384a2ae208SSatish Balay #undef __FUNCT__
13394a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_SeqSBAIJ"
1340dfbe8321SBarry Smith PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
134149b5e25fSSatish Balay {
134249b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
134349b5e25fSSatish Balay 
134449b5e25fSSatish Balay   PetscFunctionBegin;
1345899cda47SBarry Smith   info->rows_global    = (double)A->rmap.N;
1346899cda47SBarry Smith   info->columns_global = (double)A->rmap.N;
1347899cda47SBarry Smith   info->rows_local     = (double)A->rmap.N;
1348899cda47SBarry Smith   info->columns_local  = (double)A->rmap.N;
134949b5e25fSSatish Balay   info->block_size     = a->bs2;
13506c6c5352SBarry Smith   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
13516c6c5352SBarry Smith   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
135249b5e25fSSatish Balay   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
135349b5e25fSSatish Balay   info->assemblies   = A->num_ass;
135449b5e25fSSatish Balay   info->mallocs      = a->reallocs;
135549b5e25fSSatish Balay   info->memory       = A->mem;
135649b5e25fSSatish Balay   if (A->factor) {
135749b5e25fSSatish Balay     info->fill_ratio_given  = A->info.fill_ratio_given;
135849b5e25fSSatish Balay     info->fill_ratio_needed = A->info.fill_ratio_needed;
135949b5e25fSSatish Balay     info->factor_mallocs    = A->info.factor_mallocs;
136049b5e25fSSatish Balay   } else {
136149b5e25fSSatish Balay     info->fill_ratio_given  = 0;
136249b5e25fSSatish Balay     info->fill_ratio_needed = 0;
136349b5e25fSSatish Balay     info->factor_mallocs    = 0;
136449b5e25fSSatish Balay   }
136549b5e25fSSatish Balay   PetscFunctionReturn(0);
136649b5e25fSSatish Balay }
136749b5e25fSSatish Balay 
136849b5e25fSSatish Balay 
13694a2ae208SSatish Balay #undef __FUNCT__
13704a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_SeqSBAIJ"
1371dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
137249b5e25fSSatish Balay {
137349b5e25fSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1374dfbe8321SBarry Smith   PetscErrorCode ierr;
137549b5e25fSSatish Balay 
137649b5e25fSSatish Balay   PetscFunctionBegin;
137749b5e25fSSatish Balay   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
137849b5e25fSSatish Balay   PetscFunctionReturn(0);
137949b5e25fSSatish Balay }
1380dc354874SHong Zhang 
13814a2ae208SSatish Balay #undef __FUNCT__
13824a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqSBAIJ"
1383dfbe8321SBarry Smith PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat A,Vec v)
1384dc354874SHong Zhang {
1385dc354874SHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1386dfbe8321SBarry Smith   PetscErrorCode ierr;
138713f74950SBarry Smith   PetscInt i,j,n,row,col,bs,*ai,*aj,mbs;
1388c3fca9a7SHong Zhang   PetscReal    atmp;
1389273d9f13SBarry Smith   MatScalar    *aa;
139087828ca2SBarry Smith   PetscScalar  zero = 0.0,*x;
139113f74950SBarry Smith   PetscInt          ncols,brow,bcol,krow,kcol;
1392dc354874SHong Zhang 
1393dc354874SHong Zhang   PetscFunctionBegin;
1394dc354874SHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1395899cda47SBarry Smith   bs   = A->rmap.bs;
1396dc354874SHong Zhang   aa   = a->a;
1397dc354874SHong Zhang   ai   = a->i;
1398dc354874SHong Zhang   aj   = a->j;
139944117c81SHong Zhang   mbs = a->mbs;
1400dc354874SHong Zhang 
14012dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
14021ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1403dc354874SHong Zhang   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1404899cda47SBarry Smith   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
140544117c81SHong Zhang   for (i=0; i<mbs; i++) {
1406d0f6400bSHong Zhang     ncols = ai[1] - ai[0]; ai++;
1407d0f6400bSHong Zhang     brow  = bs*i;
140844117c81SHong Zhang     for (j=0; j<ncols; j++){
1409d0f6400bSHong Zhang       bcol = bs*(*aj);
141044117c81SHong Zhang       for (kcol=0; kcol<bs; kcol++){
1411d0f6400bSHong Zhang         col = bcol + kcol;      /* col index */
141244117c81SHong Zhang         for (krow=0; krow<bs; krow++){
1413d0f6400bSHong Zhang           atmp = PetscAbsScalar(*aa); aa++;
1414d0f6400bSHong Zhang           row = brow + krow;    /* row index */
1415c3fca9a7SHong Zhang           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1416c3fca9a7SHong Zhang           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
141744117c81SHong Zhang         }
141844117c81SHong Zhang       }
1419d0f6400bSHong Zhang       aj++;
1420dc354874SHong Zhang     }
1421dc354874SHong Zhang   }
14221ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1423dc354874SHong Zhang   PetscFunctionReturn(0);
1424dc354874SHong Zhang }
1425